Hi Adrien,

Couldn't find your original email from inbox anyway, have to start a new thread 
here.

> The current PCI probing method is not aware of Verbs port representors,
> which appear as standard Verbs devices bound to the same PCI address and
> cannot be distinguished.
> 
> Problem is that more often than not, the wrong Verbs device is used,
> resulting in unexpected traffic.
> 
> This patch adds necessary heuristics to bind affected driver instances to
> the intended (i.e. non-representor) device.
> 
> (Patch based on prior work from Yuanhan Liu)
> 
> Signed-off-by: Adrien Mazarguil <adrien.mazarg...@6wind.com>
> ---
>  drivers/net/mlx5/mlx5.c | 61 +++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 57 insertions(+), 4 deletions(-)
> Patch diffmbox
> 
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
> index d57e8118c..d3a298332 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -1155,6 +1155,32 @@  mlx5_dev_spawn(struct rte_device *dpdk_dev,
>  }
>  
>  /**
> + * Comparison callback to sort Verbs device names.
> + *
> + * This is meant to be used with qsort().
> + *
> + * @param a[in]
> + *   Pointer to pointer to first Verbs device.
> + * @param b[in]
> + *   Pointer to pointer to second Verbs device.
> + *
> + * @return
> + *   0 if both names are equal, less than 0 if the first argument is less
> + *   than the second, greater than 0 otherwise.
> + */
> +static int
> +mlx5_cmp_ibv_name(const void *a, const void *b)
> +{
> +     const char *name_a = (*(const struct ibv_device *const *)a)->name;
> +     const char *name_b = (*(const struct ibv_device *const *)b)->name;
> +     size_t i = 0;
> +
> +     while (name_a[i] && name_a[i] == name_b[i])
> +             ++i;
> +     return atoi(name_a + i) - atoi(name_b + i);

Comparing "1" and "10" here will return 0, does this matter?

> +}
> +
> +/**
>   * DPDK callback to register a PCI device.
>   *
>   * This function creates an Ethernet device for each port of a given
> @@ -1174,6 +1200,7 @@  mlx5_pci_probe(struct rte_pci_driver *pci_drv 
> __rte_unused,
>  {
>       struct ibv_device **ibv_list;
>       struct rte_eth_dev **eth_list = NULL;
> +     int n = 0;
>       int vf;
>       int ret;
>  
> @@ -1195,6 +1222,9 @@  mlx5_pci_probe(struct rte_pci_driver *pci_drv 
> __rte_unused,
>               DRV_LOG(ERR, "cannot list devices, is ib_uverbs loaded?");
>               return -rte_errno;
>       }
> +
> +     struct ibv_device *ibv_match[ret + 1];
> +
>       while (ret-- > 0) {
>               struct rte_pci_addr pci_addr;
>  
> @@ -1206,12 +1236,35 @@  mlx5_pci_probe(struct rte_pci_driver *pci_drv 
> __rte_unused,
>                   pci_dev->addr.devid != pci_addr.devid ||
>                   pci_dev->addr.function != pci_addr.function)
>                       continue;
> -             DRV_LOG(INFO, "PCI information matches, using device \"%s\"",
> +             DRV_LOG(INFO, "PCI information matches for device \"%s\"",
>                       ibv_list[ret]->name);
> -             break;
> +             ibv_match[n++] = ibv_list[ret];
> +     }
> +     ibv_match[n] = NULL;
> +     if (n > 1) {
> +             /*
> +              * The existence of several matching entries means port
> +              * representors have been instantiated. No existing Verbs
> +              * call nor /sys entries can tell them apart at this point.
> +              *
> +              * While definitely hackish, assume their names are numbered
> +              * based on order of creation with master device first,
> +              * followed by first port representor, followed by the
> +              * second one and so on.
> +              */
> +             DRV_LOG(WARNING,
> +                     "probing device with port representors involves"
> +                     " heuristics with uncertain outcome");
> +             qsort(ibv_match, n, sizeof(*ibv_match), mlx5_cmp_ibv_name);
> +             DRV_LOG(WARNING, "assuming \"%s\" is the master device",
> +                     ibv_match[0]->name);
> +             for (ret = 1; ret < n; ++ret)
> +                     DRV_LOG(WARNING,
> +                             "assuming \"%s\" is port representor #%d",
> +                             ibv_match[ret]->name, ret - 1);

Such dump will appear when attaching each rep port, how about just 
do it for PF in DEBUG level?

>       }
> -     if (ret >= 0)
> -             eth_list = mlx5_dev_spawn(&pci_dev->device, ibv_list[ret], vf);
> +     if (n)
> +             eth_list = mlx5_dev_spawn(&pci_dev->device, ibv_match[0], vf);
>       mlx5_glue->free_device_list(ibv_list);
>       if (!eth_list || !*eth_list) {
>               DRV_LOG(WARNING,
>

Reply via email to