Thursday, March 21, 2019 10:11 AM, Viacheslav Ovsiienko:
> Subject: [PATCH 05/14] net/mlx5: add multiport IB device support to probing
> 
> mlx5_pci_probe() routine is refactored to probe the ports of found
> Infiniband devices. All active ports (with attached network interface),
> belonging to the same Infiniband device will use the signle shared Infiniband
> context of that device.
> 
> Signed-off-by: Viacheslav Ovsiienko <viachesl...@mellanox.com>
> ---
>  drivers/net/mlx5/mlx5.c | 302 +++++++++++++++++++++++++++++++++---
> ------------
>  1 file changed, 210 insertions(+), 92 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index
> 89c30af..100e9f4 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -130,6 +130,16 @@
>  /** Driver-specific log messages type. */  int mlx5_logtype;
> 
> +/** Data associated with devices to spawn. */ struct
> +mlx5_dev_spawn_data {
> +     uint32_t ifindex; /**< Network interface index. */
> +     uint32_t max_port; /**< IB device maximal port index. */
> +     uint32_t ibv_port; /**< IB device physical port index. */
> +     struct mlx5_switch_info info; /**< Switch information. */
> +     struct ibv_device *ibv_dev; /**< Associated IB device. */
> +     struct rte_eth_dev *eth_dev; /**< Associated Ethernet device. */ };
> +
>  /**
>   * Prepare shared data between primary and secondary process.
>   */
> @@ -716,12 +726,10 @@
>   *
>   * @param dpdk_dev
>   *   Backing DPDK device.
> - * @param ibv_dev
> - *   Verbs device.
> + * @param spawn
> + *   Verbs device parameters (name, port, switch_info) to spawn.
>   * @param config
>   *   Device configuration parameters.
> - * @param[in] switch_info
> - *   Switch properties of Ethernet device.
>   *
>   * @return
>   *   A valid Ethernet device object on success, NULL otherwise and rte_errno
> @@ -732,10 +740,11 @@
>   */
>  static struct rte_eth_dev *
>  mlx5_dev_spawn(struct rte_device *dpdk_dev,
> -            struct ibv_device *ibv_dev,
> -            struct mlx5_dev_config config,
> -            const struct mlx5_switch_info *switch_info)
> +            struct mlx5_dev_spawn_data *spawn,
> +            struct mlx5_dev_config config)
>  {
> +     const struct mlx5_switch_info *switch_info = &spawn->info;
> +     struct ibv_device *ibv_dev = spawn->ibv_dev;
>       struct ibv_context *ctx = NULL;
>       struct ibv_device_attr_ex attr;
>       struct ibv_port_attr port_attr;
> @@ -952,7 +961,7 @@
>               return eth_dev;
>       }
>       /* Check port status. */
> -     err = mlx5_glue->query_port(ctx, 1, &port_attr);
> +     err = mlx5_glue->query_port(ctx, spawn->ibv_port, &port_attr);
>       if (err) {
>               DRV_LOG(ERR, "port query failed: %s", strerror(err));
>               goto error;
> @@ -1316,14 +1325,6 @@
>       return NULL;
>  }
> 
> -/** Data associated with devices to spawn. */ -struct
> mlx5_dev_spawn_data {
> -     unsigned int ifindex; /**< Network interface index. */
> -     struct mlx5_switch_info info; /**< Switch information. */
> -     struct ibv_device *ibv_dev; /**< Associated IB device. */
> -     struct rte_eth_dev *eth_dev; /**< Associated Ethernet device. */
> -};
> -
>  /**
>   * Comparison callback to sort device data.
>   *
> @@ -1380,7 +1381,9 @@ struct mlx5_dev_spawn_data {
>              struct rte_pci_device *pci_dev)  {
>       struct ibv_device **ibv_list;
> -     unsigned int n = 0;
> +     unsigned int nd = 0;
> +     unsigned int np = 0;
> +     unsigned int ns = 0;

This fields names are not informative. Find a better ones. 

>       struct mlx5_dev_config dev_config;
>       int ret;
> 
> @@ -1392,8 +1395,14 @@ struct mlx5_dev_spawn_data {
>               DRV_LOG(ERR, "cannot list devices, is ib_uverbs loaded?");
>               return -rte_errno;
>       }
> -
> +     /*
> +      * First scan the list of all Infiniband devices to find
> +      * matching ones, gathering into the list.
> +      */
>       struct ibv_device *ibv_match[ret + 1];
> +     int nl_route = -1;
> +     int nl_rdma = -1;
> +     unsigned int i;
> 
>       while (ret-- > 0) {
>               struct rte_pci_addr pci_addr;
> @@ -1408,77 +1417,183 @@ struct mlx5_dev_spawn_data {
>                       continue;
>               DRV_LOG(INFO, "PCI information matches for device
> \"%s\"",
>                       ibv_list[ret]->name);
> -             ibv_match[n++] = ibv_list[ret];
> +             ibv_match[nd++] = ibv_list[ret];
> +     }
> +     ibv_match[nd] = NULL;
> +     if (!nd) {
> +             /* No device macthes, just complain and bail out. */
> +             mlx5_glue->free_device_list(ibv_list);
> +             DRV_LOG(WARNING,
> +                     "no Verbs device matches PCI device " PCI_PRI_FMT
> ","
> +                     " are kernel drivers loaded?",
> +                     pci_dev->addr.domain, pci_dev->addr.bus,
> +                     pci_dev->addr.devid, pci_dev->addr.function);
> +             rte_errno = ENOENT;
> +             ret = -rte_errno;
> +             return ret;
> +     }
> +     nl_route = mlx5_nl_init(NETLINK_ROUTE);
> +     nl_rdma = mlx5_nl_init(NETLINK_RDMA);
> +     if (nd == 1) {
> +             /*
> +              * Found single matching device may have multiple ports.
> +              * Each port may be representor, we have to check the port
> +              * number and check the representors existence.
> +              */
> +             if (nl_rdma >= 0)
> +                     np = mlx5_nl_portnum(nl_rdma, ibv_match[0]-
> >name);
> +             if (!np)
> +                     DRV_LOG(WARNING, "can not get IB device \"%s\""
> +                                      " ports number", ibv_match[0]-
> >name);

This warning is misleading. On old kernels it is expected to have multiple IB 
devices instead of a single one w/ multiple ports.
The level should be changed for debug, and the syntax to express it is not an 
error. 

>       }
> -     ibv_match[n] = NULL;
> -
> -     struct mlx5_dev_spawn_data list[n];
> -     int nl_route = n ? mlx5_nl_init(NETLINK_ROUTE) : -1;
> -     int nl_rdma = n ? mlx5_nl_init(NETLINK_RDMA) : -1;
> -     unsigned int i;
> -     unsigned int u;
> -
>       /*
> -      * The existence of several matching entries (n > 1) means port
> -      * representors have been instantiated. No existing Verbs call nor
> -      * /sys entries can tell them apart, this can only be done through
> -      * Netlink calls assuming kernel drivers are recent enough to
> -      * support them.
> -      *
> -      * In the event of identification failure through Netlink, try again
> -      * through sysfs, then either:
> -      *
> -      * 1. No device matches (n == 0), complain and bail out.
> -      * 2. A single IB device matches (n == 1) and is not a representor,
> -      *    assume no switch support.
> -      * 3. Otherwise no safe assumptions can be made; complain louder
> and
> -      *    bail out.
> +      * Now we can determine the maximal
> +      * amount of devices to be spawned.
>        */
> -     for (i = 0; i != n; ++i) {
> -             list[i].ibv_dev = ibv_match[i];
> -             list[i].eth_dev = NULL;
> -             if (nl_rdma < 0)
> -                     list[i].ifindex = 0;
> -             else
> -                     list[i].ifindex = mlx5_nl_ifindex
> -                             (nl_rdma, list[i].ibv_dev->name, 1);
> -             if (nl_route < 0 ||
> -                 !list[i].ifindex ||
> -                 mlx5_nl_switch_info(nl_route, list[i].ifindex,
> -                                     &list[i].info) ||
> -                 ((!list[i].info.representor && !list[i].info.master) &&
> -                  mlx5_sysfs_switch_info(list[i].ifindex, &list[i].info))) {
> -                     list[i].ifindex = 0;
> -                     memset(&list[i].info, 0, sizeof(list[i].info));
> -                     continue;
> +     struct mlx5_dev_spawn_data list[np ? np : nd];
> +
> +     if (np > 1) {
> +             /*
> +              * Signle IB device with multiple ports found,
> +              * it may be E-Switch master device and representors.
> +              * We have to perform identification trough the ports.
> +              */
> +             assert(nl_rdma >= 0);
> +             assert(ns == 0);
> +             assert(nd == 1);
> +             for (i = 1; i <= np; ++i) {
> +                     list[ns].max_port = np;
> +                     list[ns].ibv_port = i;
> +                     list[ns].ibv_dev = ibv_match[0];
> +                     list[ns].eth_dev = NULL;
> +                     list[ns].ifindex = mlx5_nl_ifindex
> +                                     (nl_rdma, list[ns].ibv_dev->name, i);
> +                     if (!list[ns].ifindex) {
> +                             /*
> +                              * No network interface index found for the
> +                              * specified port, it means there is no
> +                              * representor on this port. It's OK,
> +                              * there can be disabled ports, for example
> +                              * if sriov_numvfs < sriov_totalvfs.
> +                              */
> +                             continue;
> +                     }
> +                     ret = -1;
> +                     if (nl_route >= 0)
> +                             ret = mlx5_nl_switch_info
> +                                            (nl_route,
> +                                             list[ns].ifindex,
> +                                             &list[ns].info);
> +                     if (ret || (!list[ns].info.representor &&
> +                                 !list[ns].info.master)) {
> +                             /*
> +                              * We failed to recognize representors with
> +                              * Netlink, let's try to perform the task
> +                              * with sysfs.
> +                              */
> +                             ret =  mlx5_sysfs_switch_info
> +                                             (list[ns].ifindex,
> +                                              &list[ns].info);
> +                     }
> +                     if (!ret && (list[ns].info.representor ^
> +                                  list[ns].info.master))
> +                             ns++;
>               }
> -     }
> -     if (nl_rdma >= 0)
> -             close(nl_rdma);
> -     if (nl_route >= 0)
> -             close(nl_route);
> -     /* Count unidentified devices. */
> -     for (u = 0, i = 0; i != n; ++i)
> -             if (!list[i].info.master && !list[i].info.representor)
> -                     ++u;
> -     if (u) {
> -             if (n == 1 && u == 1) {
> -                     /* Case #2. */
> -                     DRV_LOG(INFO, "no switch support detected");
> -             } else {
> -                     /* Case #3. */
> +             if (!ns) {
> +                     DRV_LOG(ERR,
> +                             "unable to recognize master/representors"
> +                             " on the IB device with multiple ports");
> +                     rte_errno = ENOENT;
> +                     ret = -rte_errno;
> +                     goto exit;
> +             }
> +     } else {
> +             /*
> +              * The existence of several matching entries (nd > 1) means
> +              * port representors have been instantiated. No existing
> Verbs
> +              * call nor sysfs entries can tell them apart, this can only
> +              * be done through Netlink calls assuming kernel drivers are
> +              * recent enough to support them.
> +              *
> +              * In the event of identification failure through Netlink,
> +              * try again through sysfs, then:
> +              *
> +              * 1. A single IB device matches (nd == 1) with single
> +              *    port (np=0/1) and is not a representor, assume
> +              *    no switch support.
> +              *
> +              * 2. Otherwise no safe assumptions can be made;
> +              *    complain louder and bail out.
> +              */
> +             np = 1;
> +             for (i = 0; i != nd; ++i) {
> +                     memset(&list[ns].info, 0, sizeof(list[ns].info));
> +                     list[ns].max_port = 1;
> +                     list[ns].ibv_port = 1;
> +                     list[ns].ibv_dev = ibv_match[i];
> +                     list[ns].eth_dev = NULL;
> +                     list[ns].ifindex = 0;
> +                     if (nl_rdma >= 0)
> +                             list[ns].ifindex = mlx5_nl_ifindex
> +                                     (nl_rdma, list[ns].ibv_dev->name, 1);
> +                     if (!list[ns].ifindex) {
> +                             /*
> +                              * No network interface index found for the
> +                              * specified device, it means there it is not
> +                              * a representor/master.
> +                              */
> +                             continue;
> +                     }
> +                     ret = -1;
> +                     if (nl_route >= 0)
> +                             ret = mlx5_nl_switch_info
> +                                            (nl_route,
> +                                             list[ns].ifindex,
> +                                             &list[ns].info);
> +                     if (ret || (!list[ns].info.representor &&
> +                                 !list[ns].info.master)) {
> +                             /*
> +                              * We failed to recognize representors with
> +                              * Netlink, let's try to perform the task
> +                              * with sysfs.
> +                              */
> +                             ret =  mlx5_sysfs_switch_info
> +                                             (list[ns].ifindex,
> +                                              &list[ns].info);
> +                     }
> +                     if (!ret && (list[ns].info.representor ^
> +                                  list[ns].info.master)) {
> +                             ns++;
> +                     } else if ((nd == 1) &&
> +                                !list[ns].info.representor &&
> +                                !list[ns].info.master) {
> +                             /*
> +                              * Single IB device with
> +                              * one physical port and
> +                              * attached network device.
> +                              * May be SRIOV is not enabled
> +                              * or there is no representors.
> +                              */
> +                             DRV_LOG(INFO, "no E-Switch support
> detected");
> +                             ns++;
> +                             break;
> +                     }
> +             }
> +             if (!ns) {
>                       DRV_LOG(ERR,
> -                             "unable to tell which of the matching
> devices"
> -                             " is the master (lack of kernel support?)");
> -                     n = 0;
> +                             "unable to recognize master/representors"
> +                             " on the multiple IB devices");
> +                     rte_errno = ENOENT;
> +                     ret = -rte_errno;
> +                     goto exit;
>               }
>       }
> +     assert(ns);
>       /*
>        * Sort list to probe devices in natural order for users convenience
>        * (i.e. master first, then representors from lowest to highest ID).
>        */
> -     if (n)
> -             qsort(list, n, sizeof(*list), mlx5_dev_spawn_data_cmp);
> +     qsort(list, ns, sizeof(*list), mlx5_dev_spawn_data_cmp);
>       /* Default configuration. */
>       dev_config = (struct mlx5_dev_config){
>               .hw_padding = 0,
> @@ -1497,7 +1612,7 @@ struct mlx5_dev_spawn_data {
>                       .min_rxqs_num = MLX5_MPRQ_MIN_RXQS,
>               },
>       };
> -     /* Device speicific configuration. */
> +     /* Device specific configuration. */
>       switch (pci_dev->id.device_id) {
>       case PCI_DEVICE_ID_MELLANOX_CONNECTX5BF:
>               dev_config.txqs_vec =
> MLX5_VPMD_MAX_TXQS_BLUEFIELD; @@ -1514,12 +1629,12 @@ struct
> mlx5_dev_spawn_data {
>       /* Set architecture-dependent default value if unset. */
>       if (dev_config.txqs_vec == MLX5_ARG_UNSET)
>               dev_config.txqs_vec = MLX5_VPMD_MAX_TXQS;
> -     for (i = 0; i != n; ++i) {
> +     for (i = 0; i != ns; ++i) {
>               uint32_t restore;
> 
>               list[i].eth_dev = mlx5_dev_spawn(&pci_dev->device,
> -                                              list[i].ibv_dev, dev_config,
> -                                              &list[i].info);
> +                                              &list[i],
> +                                              dev_config);
>               if (!list[i].eth_dev) {
>                       if (rte_errno != EBUSY && rte_errno != EEXIST)
>                               break;
> @@ -1532,16 +1647,7 @@ struct mlx5_dev_spawn_data {
>               list[i].eth_dev->data->dev_flags |= restore;
>               rte_eth_dev_probing_finish(list[i].eth_dev);
>       }
> -     mlx5_glue->free_device_list(ibv_list);
> -     if (!n) {
> -             DRV_LOG(WARNING,
> -                     "no Verbs device matches PCI device " PCI_PRI_FMT
> ","
> -                     " are kernel drivers loaded?",
> -                     pci_dev->addr.domain, pci_dev->addr.bus,
> -                     pci_dev->addr.devid, pci_dev->addr.function);
> -             rte_errno = ENOENT;
> -             ret = -rte_errno;
> -     } else if (i != n) {
> +     if (i != ns) {
>               DRV_LOG(ERR,
>                       "probe of PCI device " PCI_PRI_FMT " aborted after"
>                       " encountering an error: %s",
> @@ -1563,6 +1669,18 @@ struct mlx5_dev_spawn_data {
>       } else {
>               ret = 0;
>       }
> +exit:
> +     /*
> +      * Do the routine cleanup:
> +      * - close opened Netlink sockets
> +      * - free the Infiniband device list
> +      */
> +     if (nl_rdma >= 0)
> +             close(nl_rdma);
> +     if (nl_route >= 0)
> +             close(nl_route);
> +     assert(ibv_list);
> +     mlx5_glue->free_device_list(ibv_list);
>       return ret;
>  }
> 
> --
> 1.8.3.1

Reply via email to