Thursday, March 21, 2019 10:11 AM, Viacheslav Ovsiienko: > Subject: [PATCH 05/14] net/mlx5: add multiport IB device support to probing > > mlx5_pci_probe() routine is refactored to probe the ports of found > Infiniband devices. All active ports (with attached network interface), > belonging to the same Infiniband device will use the signle shared Infiniband > context of that device. > > Signed-off-by: Viacheslav Ovsiienko <viachesl...@mellanox.com> > --- > drivers/net/mlx5/mlx5.c | 302 +++++++++++++++++++++++++++++++++--- > ------------ > 1 file changed, 210 insertions(+), 92 deletions(-) > > diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index > 89c30af..100e9f4 100644 > --- a/drivers/net/mlx5/mlx5.c > +++ b/drivers/net/mlx5/mlx5.c > @@ -130,6 +130,16 @@ > /** Driver-specific log messages type. */ int mlx5_logtype; > > +/** Data associated with devices to spawn. */ struct > +mlx5_dev_spawn_data { > + uint32_t ifindex; /**< Network interface index. */ > + uint32_t max_port; /**< IB device maximal port index. */ > + uint32_t ibv_port; /**< IB device physical port index. */ > + struct mlx5_switch_info info; /**< Switch information. */ > + struct ibv_device *ibv_dev; /**< Associated IB device. */ > + struct rte_eth_dev *eth_dev; /**< Associated Ethernet device. */ }; > + > /** > * Prepare shared data between primary and secondary process. > */ > @@ -716,12 +726,10 @@ > * > * @param dpdk_dev > * Backing DPDK device. > - * @param ibv_dev > - * Verbs device. > + * @param spawn > + * Verbs device parameters (name, port, switch_info) to spawn. > * @param config > * Device configuration parameters. > - * @param[in] switch_info > - * Switch properties of Ethernet device. > * > * @return > * A valid Ethernet device object on success, NULL otherwise and rte_errno > @@ -732,10 +740,11 @@ > */ > static struct rte_eth_dev * > mlx5_dev_spawn(struct rte_device *dpdk_dev, > - struct ibv_device *ibv_dev, > - struct mlx5_dev_config config, > - const struct mlx5_switch_info *switch_info) > + struct mlx5_dev_spawn_data *spawn, > + struct mlx5_dev_config config) > { > + const struct mlx5_switch_info *switch_info = &spawn->info; > + struct ibv_device *ibv_dev = spawn->ibv_dev; > struct ibv_context *ctx = NULL; > struct ibv_device_attr_ex attr; > struct ibv_port_attr port_attr; > @@ -952,7 +961,7 @@ > return eth_dev; > } > /* Check port status. */ > - err = mlx5_glue->query_port(ctx, 1, &port_attr); > + err = mlx5_glue->query_port(ctx, spawn->ibv_port, &port_attr); > if (err) { > DRV_LOG(ERR, "port query failed: %s", strerror(err)); > goto error; > @@ -1316,14 +1325,6 @@ > return NULL; > } > > -/** Data associated with devices to spawn. */ -struct > mlx5_dev_spawn_data { > - unsigned int ifindex; /**< Network interface index. */ > - struct mlx5_switch_info info; /**< Switch information. */ > - struct ibv_device *ibv_dev; /**< Associated IB device. */ > - struct rte_eth_dev *eth_dev; /**< Associated Ethernet device. */ > -}; > - > /** > * Comparison callback to sort device data. > * > @@ -1380,7 +1381,9 @@ struct mlx5_dev_spawn_data { > struct rte_pci_device *pci_dev) { > struct ibv_device **ibv_list; > - unsigned int n = 0; > + unsigned int nd = 0; > + unsigned int np = 0; > + unsigned int ns = 0;
This fields names are not informative. Find a better ones. > struct mlx5_dev_config dev_config; > int ret; > > @@ -1392,8 +1395,14 @@ struct mlx5_dev_spawn_data { > DRV_LOG(ERR, "cannot list devices, is ib_uverbs loaded?"); > return -rte_errno; > } > - > + /* > + * First scan the list of all Infiniband devices to find > + * matching ones, gathering into the list. > + */ > struct ibv_device *ibv_match[ret + 1]; > + int nl_route = -1; > + int nl_rdma = -1; > + unsigned int i; > > while (ret-- > 0) { > struct rte_pci_addr pci_addr; > @@ -1408,77 +1417,183 @@ struct mlx5_dev_spawn_data { > continue; > DRV_LOG(INFO, "PCI information matches for device > \"%s\"", > ibv_list[ret]->name); > - ibv_match[n++] = ibv_list[ret]; > + ibv_match[nd++] = ibv_list[ret]; > + } > + ibv_match[nd] = NULL; > + if (!nd) { > + /* No device macthes, just complain and bail out. */ > + mlx5_glue->free_device_list(ibv_list); > + DRV_LOG(WARNING, > + "no Verbs device matches PCI device " PCI_PRI_FMT > "," > + " are kernel drivers loaded?", > + pci_dev->addr.domain, pci_dev->addr.bus, > + pci_dev->addr.devid, pci_dev->addr.function); > + rte_errno = ENOENT; > + ret = -rte_errno; > + return ret; > + } > + nl_route = mlx5_nl_init(NETLINK_ROUTE); > + nl_rdma = mlx5_nl_init(NETLINK_RDMA); > + if (nd == 1) { > + /* > + * Found single matching device may have multiple ports. > + * Each port may be representor, we have to check the port > + * number and check the representors existence. > + */ > + if (nl_rdma >= 0) > + np = mlx5_nl_portnum(nl_rdma, ibv_match[0]- > >name); > + if (!np) > + DRV_LOG(WARNING, "can not get IB device \"%s\"" > + " ports number", ibv_match[0]- > >name); This warning is misleading. On old kernels it is expected to have multiple IB devices instead of a single one w/ multiple ports. The level should be changed for debug, and the syntax to express it is not an error. > } > - ibv_match[n] = NULL; > - > - struct mlx5_dev_spawn_data list[n]; > - int nl_route = n ? mlx5_nl_init(NETLINK_ROUTE) : -1; > - int nl_rdma = n ? mlx5_nl_init(NETLINK_RDMA) : -1; > - unsigned int i; > - unsigned int u; > - > /* > - * The existence of several matching entries (n > 1) means port > - * representors have been instantiated. No existing Verbs call nor > - * /sys entries can tell them apart, this can only be done through > - * Netlink calls assuming kernel drivers are recent enough to > - * support them. > - * > - * In the event of identification failure through Netlink, try again > - * through sysfs, then either: > - * > - * 1. No device matches (n == 0), complain and bail out. > - * 2. A single IB device matches (n == 1) and is not a representor, > - * assume no switch support. > - * 3. Otherwise no safe assumptions can be made; complain louder > and > - * bail out. > + * Now we can determine the maximal > + * amount of devices to be spawned. > */ > - for (i = 0; i != n; ++i) { > - list[i].ibv_dev = ibv_match[i]; > - list[i].eth_dev = NULL; > - if (nl_rdma < 0) > - list[i].ifindex = 0; > - else > - list[i].ifindex = mlx5_nl_ifindex > - (nl_rdma, list[i].ibv_dev->name, 1); > - if (nl_route < 0 || > - !list[i].ifindex || > - mlx5_nl_switch_info(nl_route, list[i].ifindex, > - &list[i].info) || > - ((!list[i].info.representor && !list[i].info.master) && > - mlx5_sysfs_switch_info(list[i].ifindex, &list[i].info))) { > - list[i].ifindex = 0; > - memset(&list[i].info, 0, sizeof(list[i].info)); > - continue; > + struct mlx5_dev_spawn_data list[np ? np : nd]; > + > + if (np > 1) { > + /* > + * Signle IB device with multiple ports found, > + * it may be E-Switch master device and representors. > + * We have to perform identification trough the ports. > + */ > + assert(nl_rdma >= 0); > + assert(ns == 0); > + assert(nd == 1); > + for (i = 1; i <= np; ++i) { > + list[ns].max_port = np; > + list[ns].ibv_port = i; > + list[ns].ibv_dev = ibv_match[0]; > + list[ns].eth_dev = NULL; > + list[ns].ifindex = mlx5_nl_ifindex > + (nl_rdma, list[ns].ibv_dev->name, i); > + if (!list[ns].ifindex) { > + /* > + * No network interface index found for the > + * specified port, it means there is no > + * representor on this port. It's OK, > + * there can be disabled ports, for example > + * if sriov_numvfs < sriov_totalvfs. > + */ > + continue; > + } > + ret = -1; > + if (nl_route >= 0) > + ret = mlx5_nl_switch_info > + (nl_route, > + list[ns].ifindex, > + &list[ns].info); > + if (ret || (!list[ns].info.representor && > + !list[ns].info.master)) { > + /* > + * We failed to recognize representors with > + * Netlink, let's try to perform the task > + * with sysfs. > + */ > + ret = mlx5_sysfs_switch_info > + (list[ns].ifindex, > + &list[ns].info); > + } > + if (!ret && (list[ns].info.representor ^ > + list[ns].info.master)) > + ns++; > } > - } > - if (nl_rdma >= 0) > - close(nl_rdma); > - if (nl_route >= 0) > - close(nl_route); > - /* Count unidentified devices. */ > - for (u = 0, i = 0; i != n; ++i) > - if (!list[i].info.master && !list[i].info.representor) > - ++u; > - if (u) { > - if (n == 1 && u == 1) { > - /* Case #2. */ > - DRV_LOG(INFO, "no switch support detected"); > - } else { > - /* Case #3. */ > + if (!ns) { > + DRV_LOG(ERR, > + "unable to recognize master/representors" > + " on the IB device with multiple ports"); > + rte_errno = ENOENT; > + ret = -rte_errno; > + goto exit; > + } > + } else { > + /* > + * The existence of several matching entries (nd > 1) means > + * port representors have been instantiated. No existing > Verbs > + * call nor sysfs entries can tell them apart, this can only > + * be done through Netlink calls assuming kernel drivers are > + * recent enough to support them. > + * > + * In the event of identification failure through Netlink, > + * try again through sysfs, then: > + * > + * 1. A single IB device matches (nd == 1) with single > + * port (np=0/1) and is not a representor, assume > + * no switch support. > + * > + * 2. Otherwise no safe assumptions can be made; > + * complain louder and bail out. > + */ > + np = 1; > + for (i = 0; i != nd; ++i) { > + memset(&list[ns].info, 0, sizeof(list[ns].info)); > + list[ns].max_port = 1; > + list[ns].ibv_port = 1; > + list[ns].ibv_dev = ibv_match[i]; > + list[ns].eth_dev = NULL; > + list[ns].ifindex = 0; > + if (nl_rdma >= 0) > + list[ns].ifindex = mlx5_nl_ifindex > + (nl_rdma, list[ns].ibv_dev->name, 1); > + if (!list[ns].ifindex) { > + /* > + * No network interface index found for the > + * specified device, it means there it is not > + * a representor/master. > + */ > + continue; > + } > + ret = -1; > + if (nl_route >= 0) > + ret = mlx5_nl_switch_info > + (nl_route, > + list[ns].ifindex, > + &list[ns].info); > + if (ret || (!list[ns].info.representor && > + !list[ns].info.master)) { > + /* > + * We failed to recognize representors with > + * Netlink, let's try to perform the task > + * with sysfs. > + */ > + ret = mlx5_sysfs_switch_info > + (list[ns].ifindex, > + &list[ns].info); > + } > + if (!ret && (list[ns].info.representor ^ > + list[ns].info.master)) { > + ns++; > + } else if ((nd == 1) && > + !list[ns].info.representor && > + !list[ns].info.master) { > + /* > + * Single IB device with > + * one physical port and > + * attached network device. > + * May be SRIOV is not enabled > + * or there is no representors. > + */ > + DRV_LOG(INFO, "no E-Switch support > detected"); > + ns++; > + break; > + } > + } > + if (!ns) { > DRV_LOG(ERR, > - "unable to tell which of the matching > devices" > - " is the master (lack of kernel support?)"); > - n = 0; > + "unable to recognize master/representors" > + " on the multiple IB devices"); > + rte_errno = ENOENT; > + ret = -rte_errno; > + goto exit; > } > } > + assert(ns); > /* > * Sort list to probe devices in natural order for users convenience > * (i.e. master first, then representors from lowest to highest ID). > */ > - if (n) > - qsort(list, n, sizeof(*list), mlx5_dev_spawn_data_cmp); > + qsort(list, ns, sizeof(*list), mlx5_dev_spawn_data_cmp); > /* Default configuration. */ > dev_config = (struct mlx5_dev_config){ > .hw_padding = 0, > @@ -1497,7 +1612,7 @@ struct mlx5_dev_spawn_data { > .min_rxqs_num = MLX5_MPRQ_MIN_RXQS, > }, > }; > - /* Device speicific configuration. */ > + /* Device specific configuration. */ > switch (pci_dev->id.device_id) { > case PCI_DEVICE_ID_MELLANOX_CONNECTX5BF: > dev_config.txqs_vec = > MLX5_VPMD_MAX_TXQS_BLUEFIELD; @@ -1514,12 +1629,12 @@ struct > mlx5_dev_spawn_data { > /* Set architecture-dependent default value if unset. */ > if (dev_config.txqs_vec == MLX5_ARG_UNSET) > dev_config.txqs_vec = MLX5_VPMD_MAX_TXQS; > - for (i = 0; i != n; ++i) { > + for (i = 0; i != ns; ++i) { > uint32_t restore; > > list[i].eth_dev = mlx5_dev_spawn(&pci_dev->device, > - list[i].ibv_dev, dev_config, > - &list[i].info); > + &list[i], > + dev_config); > if (!list[i].eth_dev) { > if (rte_errno != EBUSY && rte_errno != EEXIST) > break; > @@ -1532,16 +1647,7 @@ struct mlx5_dev_spawn_data { > list[i].eth_dev->data->dev_flags |= restore; > rte_eth_dev_probing_finish(list[i].eth_dev); > } > - mlx5_glue->free_device_list(ibv_list); > - if (!n) { > - DRV_LOG(WARNING, > - "no Verbs device matches PCI device " PCI_PRI_FMT > "," > - " are kernel drivers loaded?", > - pci_dev->addr.domain, pci_dev->addr.bus, > - pci_dev->addr.devid, pci_dev->addr.function); > - rte_errno = ENOENT; > - ret = -rte_errno; > - } else if (i != n) { > + if (i != ns) { > DRV_LOG(ERR, > "probe of PCI device " PCI_PRI_FMT " aborted after" > " encountering an error: %s", > @@ -1563,6 +1669,18 @@ struct mlx5_dev_spawn_data { > } else { > ret = 0; > } > +exit: > + /* > + * Do the routine cleanup: > + * - close opened Netlink sockets > + * - free the Infiniband device list > + */ > + if (nl_rdma >= 0) > + close(nl_rdma); > + if (nl_route >= 0) > + close(nl_route); > + assert(ibv_list); > + mlx5_glue->free_device_list(ibv_list); > return ret; > } > > -- > 1.8.3.1