A callback parameter to process replies will be useful for subsequent work in this area. It implies the following:
- Replies may be much larger than requests. In fact their size cannot really be known in advance. Using MNL_SOCKET_BUFFER_SIZE (at least 8192 bytes) is the recommended approach to make truncation less likely (look for NLMSG_GOODSIZE in Linux). - Multipart replies are made of several messages. A loop is needed to process these. - In case of truncated message (since one cannot really be sure), its remaining parts must be flushed to prevent their reception by subsequent queries. - Using rte_get_tsc_cycles() instead of random() for message sequence numbers is faster yet unlikely to pick the same number twice in a row. - mlx5_nl_flow_init() can be simplified since the query message is never written over (it was already the case actually). Signed-off-by: Adrien Mazarguil <adrien.mazarg...@6wind.com> --- drivers/net/mlx5/mlx5_nl_flow.c | 73 ++++++++++++++++++++++++------------ 1 file changed, 48 insertions(+), 25 deletions(-) diff --git a/drivers/net/mlx5/mlx5_nl_flow.c b/drivers/net/mlx5/mlx5_nl_flow.c index 9ea2a1b55..e720728b7 100644 --- a/drivers/net/mlx5/mlx5_nl_flow.c +++ b/drivers/net/mlx5/mlx5_nl_flow.c @@ -22,6 +22,7 @@ #include <sys/socket.h> #include <rte_byteorder.h> +#include <rte_cycles.h> #include <rte_errno.h> #include <rte_ether.h> #include <rte_flow.h> @@ -1050,38 +1051,63 @@ mlx5_nl_flow_brand(void *buf, uint32_t handle) } /** - * Send Netlink message with acknowledgment. + * Send Netlink message with acknowledgment and process reply. * * @param nl * Libmnl socket to use. * @param nlh - * Message to send. This function always raises the NLM_F_ACK flag before - * sending. + * Message to send. This function always raises the NLM_F_ACK flag and + * sets its sequence number before sending. + * @param cb + * Callback handler for received message. + * @param arg + * Data pointer for callback handler. * * @return * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -mlx5_nl_flow_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh) +mlx5_nl_flow_chat(struct mnl_socket *nl, struct nlmsghdr *nlh, + mnl_cb_t cb, void *arg) { alignas(struct nlmsghdr) - uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) + - nlh->nlmsg_len - sizeof(*nlh)]; - uint32_t seq = random(); + uint8_t ans[MNL_SOCKET_BUFFER_SIZE]; + unsigned int portid = mnl_socket_get_portid(nl); + uint32_t seq = rte_get_tsc_cycles(); + int err = 0; int ret; nlh->nlmsg_flags |= NLM_F_ACK; nlh->nlmsg_seq = seq; ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len); - if (ret != -1) + nlh = (void *)ans; + /* + * The following loop postpones non-fatal errors until multipart + * messages are complete. + */ + while (ret > 0) { ret = mnl_socket_recvfrom(nl, ans, sizeof(ans)); - if (ret != -1) - ret = mnl_cb_run - (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL); - if (!ret) + if (ret == -1) { + err = errno; + if (err != ENOSPC) + break; + ret = sizeof(*nlh); + } + if (!err) { + ret = mnl_cb_run(nlh, ret, seq, portid, cb, arg); + if (ret < 0) + err = -ret; + } + if (!(nlh->nlmsg_flags & NLM_F_MULTI) || + nlh->nlmsg_type == NLMSG_DONE) + ret = -err; + else + ret = 1; + } + if (!err) return 0; - rte_errno = errno; - return -rte_errno; + rte_errno = err; + return -err; } /** @@ -1105,7 +1131,7 @@ mlx5_nl_flow_create(struct mnl_socket *nl, void *buf, nlh->nlmsg_type = RTM_NEWTFILTER; nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; - if (!mlx5_nl_flow_nl_ack(nl, nlh)) + if (!mlx5_nl_flow_chat(nl, nlh, NULL, NULL)) return 0; return rte_flow_error_set (error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, @@ -1133,7 +1159,7 @@ mlx5_nl_flow_destroy(struct mnl_socket *nl, void *buf, nlh->nlmsg_type = RTM_DELTFILTER; nlh->nlmsg_flags = NLM_F_REQUEST; - if (!mlx5_nl_flow_nl_ack(nl, nlh)) + if (!mlx5_nl_flow_chat(nl, nlh, NULL, NULL)) return 0; return rte_flow_error_set (error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, @@ -1171,23 +1197,20 @@ mlx5_nl_flow_ifindex_init(struct mnl_socket *nl, unsigned int ifindex, tcm->tcm_ifindex = ifindex; tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0); tcm->tcm_parent = TC_H_INGRESS; + if (!mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress")) + return rte_flow_error_set + (error, ENOBUFS, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, "netlink: not enough space for message"); /* Ignore errors when qdisc is already absent. */ - if (mlx5_nl_flow_nl_ack(nl, nlh) && + if (mlx5_nl_flow_chat(nl, nlh, NULL, NULL) && rte_errno != EINVAL && rte_errno != ENOENT) return rte_flow_error_set (error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, "netlink: failed to remove ingress qdisc"); /* Create fresh ingress qdisc. */ - nlh = mnl_nlmsg_put_header(buf); nlh->nlmsg_type = RTM_NEWQDISC; nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; - tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm)); - tcm->tcm_family = AF_UNSPEC; - tcm->tcm_ifindex = ifindex; - tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0); - tcm->tcm_parent = TC_H_INGRESS; - mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress"); - if (mlx5_nl_flow_nl_ack(nl, nlh)) + if (mlx5_nl_flow_chat(nl, nlh, NULL, NULL)) return rte_flow_error_set (error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, "netlink: failed to create ingress qdisc"); -- 2.11.0