In socket direct mode, it's possible to bind any two (maybe four in future) PCIe devices with IDs like xxxx:xx:xx.x and yyyy:yy:yy.y. Bonding member interfaces are unnecessary to have the same PCIe domain/bus/device ID anymore,
Kernel driver uses "system_image_guid" to identify if devices can be bound together or not. Sysfs "phys_switch_id" is used to get "system_image_guid" of each network interface. OFED 5.4+ is required to support "phys_switch_id". Signed-off-by: Rongwei Liu <rongw...@nvidia.com> Acked-by: Viacheslav Ovsiienko <viachesl...@nvidia.com> --- doc/guides/nics/mlx5.rst | 4 +++ doc/guides/rel_notes/release_21_11.rst | 4 +++ drivers/net/mlx5/linux/mlx5_os.c | 43 ++++++++++++++++++++------ 3 files changed, 42 insertions(+), 9 deletions(-) diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst index bae73f42d8..b58236e00a 100644 --- a/doc/guides/nics/mlx5.rst +++ b/doc/guides/nics/mlx5.rst @@ -464,6 +464,10 @@ Limitations - In order to achieve best insertion rate, application should manage the flows per lcore. - Better to disable memory reclaim by setting ``reclaim_mem_mode`` to 0 to accelerate the flow object allocation and release with cache. +- Bonding under socket direct mode + + - Needs OFED 5.4+. + Statistics ---------- diff --git a/doc/guides/rel_notes/release_21_11.rst b/doc/guides/rel_notes/release_21_11.rst index dfc2cbdeed..2a6cc765c2 100644 --- a/doc/guides/rel_notes/release_21_11.rst +++ b/doc/guides/rel_notes/release_21_11.rst @@ -106,6 +106,10 @@ New Features * Added DES-CBC, AES-XCBC-MAC, AES-CMAC and non-HMAC algo support. * Added PDCP short MAC-I support. +* **Updated Mellanox mlx5 driver.** + + * Added socket direct mode bonding support. + * **Updated NXP dpaa2_sec crypto PMD.** * Added PDCP short MAC-I support. diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index 3746057673..1d57b934fc 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -2008,6 +2008,8 @@ mlx5_device_bond_pci_match(const struct ibv_device *ibv_dev, FILE *bond_file = NULL, *file; int pf = -1; int ret; + uint8_t cur_guid[32] = {0}; + uint8_t guid[32] = {0}; /* * Try to get master device name. If something goes @@ -2022,6 +2024,8 @@ mlx5_device_bond_pci_match(const struct ibv_device *ibv_dev, np = mlx5_nl_portnum(nl_rdma, ibv_dev->name); if (!np) return -1; + if (mlx5_get_device_guid(pci_dev, cur_guid, sizeof(cur_guid)) < 0) + return -1; /* * The Master device might not be on the predefined * port (not on port index 1, it is not garanted), @@ -2050,6 +2054,7 @@ mlx5_device_bond_pci_match(const struct ibv_device *ibv_dev, char tmp_str[IF_NAMESIZE + 32]; struct rte_pci_addr pci_addr; struct mlx5_switch_info info; + int ret; /* Process slave interface names in the loop. */ snprintf(tmp_str, sizeof(tmp_str), @@ -2080,15 +2085,6 @@ mlx5_device_bond_pci_match(const struct ibv_device *ibv_dev, tmp_str); break; } - /* Match PCI address, allows BDF0+pfx or BDFx+pfx. */ - if (pci_dev->domain == pci_addr.domain && - pci_dev->bus == pci_addr.bus && - pci_dev->devid == pci_addr.devid && - ((pci_dev->function == 0 && - pci_dev->function + owner == pci_addr.function) || - (pci_dev->function == owner && - pci_addr.function == owner))) - pf = info.port_name; /* Get ifindex. */ snprintf(tmp_str, sizeof(tmp_str), "/sys/class/net/%s/ifindex", ifname); @@ -2105,6 +2101,30 @@ mlx5_device_bond_pci_match(const struct ibv_device *ibv_dev, bond_info->ports[info.port_name].pci_addr = pci_addr; bond_info->ports[info.port_name].ifindex = ifindex; bond_info->n_port++; + /* + * Under socket direct mode, bonding will use + * system_image_guid as identification. + * After OFED 5.4, guid is readable (ret >= 0) under sysfs. + * All bonding members should have the same guid even if driver + * is using PCIe BDF. + */ + ret = mlx5_get_device_guid(&pci_addr, guid, sizeof(guid)); + if (ret < 0) + break; + else if (ret > 0) { + if (!memcmp(guid, cur_guid, sizeof(guid)) && + owner == info.port_name && + (owner != 0 || (owner == 0 && + !rte_pci_addr_cmp(pci_dev, &pci_addr)))) + pf = info.port_name; + } else if (pci_dev->domain == pci_addr.domain && + pci_dev->bus == pci_addr.bus && + pci_dev->devid == pci_addr.devid && + ((pci_dev->function == 0 && + pci_dev->function + owner == pci_addr.function) || + (pci_dev->function == owner && + pci_addr.function == owner))) + pf = info.port_name; } if (pf >= 0) { /* Get bond interface info */ @@ -2117,6 +2137,11 @@ mlx5_device_bond_pci_match(const struct ibv_device *ibv_dev, DRV_LOG(INFO, "PF device %u, bond device %u(%s)", ifindex, bond_info->ifindex, bond_info->ifname); } + if (owner == 0 && pf != 0) { + DRV_LOG(INFO, "PCIe instance %04x:%02x:%02x.%x isn't bonding owner", + pci_dev->domain, pci_dev->bus, pci_dev->devid, + pci_dev->function); + } return pf; } -- 2.27.0