Introduce mlx5_nl_read_events() to read Netlink events (technically, messages) from a socket that was configured to listen for them via a new mlx5_nl_init() parameter. Add mlx5_nl_parse_link_status_update() helper to extract information from link-related events. This patch is a shared base for later fixes.
Cc: sta...@dpdk.org Signed-off-by: Dmitry Kozlyuk <dkozl...@nvidia.com> Reviewed-by: Viacheslav Ovsiienko <viachesl...@nvidia.com> --- drivers/common/mlx5/linux/mlx5_common_os.c | 2 +- drivers/common/mlx5/linux/mlx5_nl.c | 102 ++++++++++++++++++++- drivers/common/mlx5/linux/mlx5_nl.h | 8 +- drivers/common/mlx5/version.map | 2 + drivers/net/mlx5/linux/mlx5_os.c | 8 +- drivers/net/mlx5/linux/mlx5_vlan_os.c | 2 +- 6 files changed, 116 insertions(+), 8 deletions(-) diff --git a/drivers/common/mlx5/linux/mlx5_common_os.c b/drivers/common/mlx5/linux/mlx5_common_os.c index 0d3e24e04e..25e09bb55b 100644 --- a/drivers/common/mlx5/linux/mlx5_common_os.c +++ b/drivers/common/mlx5/linux/mlx5_common_os.c @@ -487,7 +487,7 @@ mlx5_os_get_ibv_device(const struct rte_pci_addr *addr) static int mlx5_nl_roce_disable(const char *addr) { - int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC); + int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC, 0); int devlink_id; int enable; int ret; diff --git a/drivers/common/mlx5/linux/mlx5_nl.c b/drivers/common/mlx5/linux/mlx5_nl.c index fd4c2d2625..5d04857b38 100644 --- a/drivers/common/mlx5/linux/mlx5_nl.c +++ b/drivers/common/mlx5/linux/mlx5_nl.c @@ -185,19 +185,22 @@ uint32_t atomic_sn; * * @param protocol * Netlink protocol (e.g. NETLINK_ROUTE, NETLINK_RDMA). + * @param groups + * Groups to listen (e.g. RTMGRP_LINK), can be 0. * * @return * A file descriptor on success, a negative errno value otherwise and * rte_errno is set. */ int -mlx5_nl_init(int protocol) +mlx5_nl_init(int protocol, int groups) { int fd; int buf_size; socklen_t opt_size; struct sockaddr_nl local = { .nl_family = AF_NETLINK, + .nl_groups = groups, }; int ret; @@ -1862,3 +1865,100 @@ mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *pci_addr, /* Now, need to reload the driver. */ return mlx5_nl_driver_reload(nlsk_fd, family_id, pci_addr); } + +/** + * Try to parse a Netlink message as a link status update. + * + * @param hdr + * Netlink message header. + * @param[out] ifindex + * Index of the updated interface. + * + * @return + * 0 on success, negative on failure. + */ +int +mlx5_nl_parse_link_status_update(struct nlmsghdr *hdr, uint32_t *ifindex) +{ + struct ifinfomsg *info; + + switch (hdr->nlmsg_type) { + case RTM_NEWLINK: + case RTM_DELLINK: + case RTM_GETLINK: + case RTM_SETLINK: + info = NLMSG_DATA(hdr); + *ifindex = info->ifi_index; + return 0; + } + return -1; +} + +/** + * Read pending events from a Netlink socket. + * + * @param nlsk_fd + * Netlink socket. + * @param cb + * Callback invoked for each of the events. + * @param cb_arg + * User data for the callback. + * + * @return + * 0 on success, including the case when there are no events. + * Negative on failure and rte_errno is set. + */ +int +mlx5_nl_read_events(int nlsk_fd, mlx5_nl_event_cb *cb, void *cb_arg) +{ + char buf[8192]; + struct sockaddr_nl addr; + struct iovec iov = { + .iov_base = buf, + .iov_len = sizeof(buf), + }; + struct msghdr msg = { + .msg_name = &addr, + .msg_namelen = sizeof(addr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + struct nlmsghdr *hdr; + ssize_t size; + + while (1) { + size = recvmsg(nlsk_fd, &msg, MSG_DONTWAIT); + if (size < 0) { + if (errno == EAGAIN) + return 0; + if (errno == EINTR) + continue; + DRV_LOG(DEBUG, "Failed to receive netlink message: %s", + strerror(errno)); + rte_errno = errno; + return -rte_errno; + } + hdr = (struct nlmsghdr *)buf; + while (size >= (ssize_t)sizeof(*hdr)) { + ssize_t msg_len = hdr->nlmsg_len; + ssize_t data_len = msg_len - sizeof(*hdr); + ssize_t aligned_len; + + if (data_len < 0) { + DRV_LOG(DEBUG, "Netlink message too short"); + rte_errno = EINVAL; + return -rte_errno; + } + aligned_len = NLMSG_ALIGN(msg_len); + if (aligned_len > size) { + DRV_LOG(DEBUG, "Netlink message too long"); + rte_errno = EINVAL; + return -rte_errno; + } + cb(hdr, cb_arg); + hdr = RTE_PTR_ADD(hdr, aligned_len); + size -= aligned_len; + } + } + return 0; +} diff --git a/drivers/common/mlx5/linux/mlx5_nl.h b/drivers/common/mlx5/linux/mlx5_nl.h index 2063c0deeb..0b7552338a 100644 --- a/drivers/common/mlx5/linux/mlx5_nl.h +++ b/drivers/common/mlx5/linux/mlx5_nl.h @@ -11,6 +11,7 @@ #include "mlx5_common.h" +typedef void (mlx5_nl_event_cb)(struct nlmsghdr *hdr, void *user_data); /* VLAN netdev for VLAN workaround. */ struct mlx5_nl_vlan_dev { @@ -30,7 +31,7 @@ struct mlx5_nl_vlan_vmwa_context { }; __rte_internal -int mlx5_nl_init(int protocol); +int mlx5_nl_init(int protocol, int groups); __rte_internal int mlx5_nl_mac_addr_add(int nlsk_fd, unsigned int iface_idx, uint64_t *mac_own, struct rte_ether_addr *mac, uint32_t index); @@ -75,4 +76,9 @@ int mlx5_nl_enable_roce_get(int nlsk_fd, int family_id, const char *pci_addr, int mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *pci_addr, int enable); +__rte_internal +int mlx5_nl_read_events(int nlsk_fd, mlx5_nl_event_cb *cb, void *cb_arg); +__rte_internal +int mlx5_nl_parse_link_status_update(struct nlmsghdr *hdr, uint32_t *ifindex); + #endif /* RTE_PMD_MLX5_NL_H_ */ diff --git a/drivers/common/mlx5/version.map b/drivers/common/mlx5/version.map index 462b7cea5e..d9b7ccacde 100644 --- a/drivers/common/mlx5/version.map +++ b/drivers/common/mlx5/version.map @@ -123,9 +123,11 @@ INTERNAL { mlx5_nl_mac_addr_flush; # WINDOWS_NO_EXPORT mlx5_nl_mac_addr_remove; # WINDOWS_NO_EXPORT mlx5_nl_mac_addr_sync; # WINDOWS_NO_EXPORT + mlx5_nl_parse_link_status_update; # WINDOWS_NO_EXPORT mlx5_nl_port_state; # WINDOWS_NO_EXPORT mlx5_nl_portnum; # WINDOWS_NO_EXPORT mlx5_nl_promisc; # WINDOWS_NO_EXPORT + mlx5_nl_read_events; # WINDOWS_NO_EXPORT mlx5_nl_switch_info; # WINDOWS_NO_EXPORT mlx5_nl_vf_mac_addr_modify; # WINDOWS_NO_EXPORT mlx5_nl_vlan_vmwa_create; # WINDOWS_NO_EXPORT diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index bbe05bb837..602473e8f7 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -1086,7 +1086,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, " old OFED/rdma-core version or firmware configuration"); #endif config->mpls_en = mpls_en; - nl_rdma = mlx5_nl_init(NETLINK_RDMA); + nl_rdma = mlx5_nl_init(NETLINK_RDMA, 0); /* Check port status. */ if (spawn->phys_port <= UINT8_MAX) { /* Legacy Verbs api only support u8 port number. */ @@ -1133,7 +1133,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, priv->mtu = RTE_ETHER_MTU; /* Some internal functions rely on Netlink sockets, open them now. */ priv->nl_socket_rdma = nl_rdma; - priv->nl_socket_route = mlx5_nl_init(NETLINK_ROUTE); + priv->nl_socket_route = mlx5_nl_init(NETLINK_ROUTE, 0); priv->representor = !!switch_info->representor; priv->master = !!switch_info->master; priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID; @@ -2130,8 +2130,8 @@ mlx5_os_pci_probe_pf(struct mlx5_common_device *cdev, * matching ones, gathering into the list. */ struct ibv_device *ibv_match[ret + 1]; - int nl_route = mlx5_nl_init(NETLINK_ROUTE); - int nl_rdma = mlx5_nl_init(NETLINK_RDMA); + int nl_route = mlx5_nl_init(NETLINK_ROUTE, 0); + int nl_rdma = mlx5_nl_init(NETLINK_RDMA, 0); unsigned int i; while (ret-- > 0) { diff --git a/drivers/net/mlx5/linux/mlx5_vlan_os.c b/drivers/net/mlx5/linux/mlx5_vlan_os.c index 005904bdfe..7ee2460a23 100644 --- a/drivers/net/mlx5/linux/mlx5_vlan_os.c +++ b/drivers/net/mlx5/linux/mlx5_vlan_os.c @@ -136,7 +136,7 @@ mlx5_vlan_vmwa_init(struct rte_eth_dev *dev, uint32_t ifindex) return NULL; } rte_spinlock_init(&vmwa->sl); - vmwa->nl_socket = mlx5_nl_init(NETLINK_ROUTE); + vmwa->nl_socket = mlx5_nl_init(NETLINK_ROUTE, 0); if (vmwa->nl_socket < 0) { DRV_LOG(WARNING, "Can not create Netlink socket" -- 2.25.1