This provides support for the VXLAN_DECAP action. Outer tunnel properties are specified as the initial part of the flow rule pattern (up to and including VXLAN item), optionally followed by inner traffic properties.
Testpmd examples: - Creating a flow on port ID 1 performing VXLAN decapsulation and directing the result to port ID 2 without checking inner properties: flow create 1 ingress transfer pattern eth src is 66:77:88:99:aa:bb dst is 00:11:22:33:44:55 / ipv4 src is 2.2.2.2 dst is 1.1.1.1 / udp src is 4789 dst is 4242 / vxlan vni is 0x112233 / end actions vxlan_decap / port_id id 2 / end - Same as above except only inner TCPv6 packets with destination port 42 will be let through: flow create 1 ingress transfer pattern eth src is 66:77:88:99:aa:bb dst is 00:11:22:33:44:55 / ipv4 src is 2.2.2.2 dst is 1.1.1.1 / udp src is 4789 dst is 4242 / vxlan vni is 0x112233 / eth / ipv6 / tcp dst is 42 / end actions vxlan_decap / port_id id 2 / end Signed-off-by: Adrien Mazarguil <adrien.mazarg...@6wind.com> --- drivers/net/mlx5/Makefile | 65 +++++++ drivers/net/mlx5/mlx5_nl_flow.c | 344 ++++++++++++++++++++++++++++++++--- 2 files changed, 379 insertions(+), 30 deletions(-) diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile index 1ba4ce612..85672abd6 100644 --- a/drivers/net/mlx5/Makefile +++ b/drivers/net/mlx5/Makefile @@ -335,6 +335,71 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh enum TCA_FLOWER_KEY_VLAN_ETH_TYPE \ $(AUTOCONF_OUTPUT) $Q sh -- '$<' '$@' \ + HAVE_TCA_FLOWER_KEY_ENC_KEY_ID \ + linux/pkt_cls.h \ + enum TCA_FLOWER_KEY_ENC_KEY_ID \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ + HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC \ + linux/pkt_cls.h \ + enum TCA_FLOWER_KEY_ENC_IPV4_SRC \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ + HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK \ + linux/pkt_cls.h \ + enum TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ + HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST \ + linux/pkt_cls.h \ + enum TCA_FLOWER_KEY_ENC_IPV4_DST \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ + HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST_MASK \ + linux/pkt_cls.h \ + enum TCA_FLOWER_KEY_ENC_IPV4_DST_MASK \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ + HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC \ + linux/pkt_cls.h \ + enum TCA_FLOWER_KEY_ENC_IPV6_SRC \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ + HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK \ + linux/pkt_cls.h \ + enum TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ + HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST \ + linux/pkt_cls.h \ + enum TCA_FLOWER_KEY_ENC_IPV6_DST \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ + HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST_MASK \ + linux/pkt_cls.h \ + enum TCA_FLOWER_KEY_ENC_IPV6_DST_MASK \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ + HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT \ + linux/pkt_cls.h \ + enum TCA_FLOWER_KEY_ENC_UDP_SRC_PORT \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ + HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK \ + linux/pkt_cls.h \ + enum TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ + HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT \ + linux/pkt_cls.h \ + enum TCA_FLOWER_KEY_ENC_UDP_DST_PORT \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ + HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK \ + linux/pkt_cls.h \ + enum TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ HAVE_TC_ACT_VLAN \ linux/tc_act/tc_vlan.h \ enum TCA_VLAN_PUSH_VLAN_PRIORITY \ diff --git a/drivers/net/mlx5/mlx5_nl_flow.c b/drivers/net/mlx5/mlx5_nl_flow.c index 672f92863..12802796a 100644 --- a/drivers/net/mlx5/mlx5_nl_flow.c +++ b/drivers/net/mlx5/mlx5_nl_flow.c @@ -201,6 +201,45 @@ struct tc_tunnel_key { #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25 #endif +#ifndef HAVE_TCA_FLOWER_KEY_ENC_KEY_ID +#define TCA_FLOWER_KEY_ENC_KEY_ID 26 +#endif +#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC +#define TCA_FLOWER_KEY_ENC_IPV4_SRC 27 +#endif +#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK +#define TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK 28 +#endif +#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST +#define TCA_FLOWER_KEY_ENC_IPV4_DST 29 +#endif +#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST_MASK +#define TCA_FLOWER_KEY_ENC_IPV4_DST_MASK 30 +#endif +#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC +#define TCA_FLOWER_KEY_ENC_IPV6_SRC 31 +#endif +#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK +#define TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK 32 +#endif +#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST +#define TCA_FLOWER_KEY_ENC_IPV6_DST 33 +#endif +#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST_MASK +#define TCA_FLOWER_KEY_ENC_IPV6_DST_MASK 34 +#endif +#ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT +#define TCA_FLOWER_KEY_ENC_UDP_SRC_PORT 43 +#endif +#ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK +#define TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK 44 +#endif +#ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT +#define TCA_FLOWER_KEY_ENC_UDP_DST_PORT 45 +#endif +#ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK +#define TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK 46 +#endif #define BIT(b) (1 << (b)) #define BIT_ENCAP(e) BIT(MLX5_NL_FLOW_ENCAP_ ## e) @@ -278,6 +317,7 @@ struct mlx5_nl_flow_ctx { struct mlx5_nl_flow { uint32_t size; /**< Size of this object. */ uint32_t applied:1; /**< Whether rule is currently applied. */ + uint32_t decap:1; /**< Decapsulate @p encap. */ unsigned int encap_ifindex; /**< Interface to use with @p encap. */ unsigned int *ifindex_src; /**< Source interface. */ unsigned int *ifindex_dst; /**< Destination interface. */ @@ -301,6 +341,11 @@ enum mlx5_nl_flow_trans { ITEM_TCP, ITEM_UDP, ITEM_VXLAN, + ITEM_VXLAN_END, + ITEM_TUN_ETH, + ITEM_TUN_IPV4, + ITEM_TUN_IPV6, + ITEM_TUN_UDP, ACTIONS, ACTION_VOID, ACTION_PORT_ID, @@ -339,7 +384,12 @@ static const enum mlx5_nl_flow_trans *const mlx5_nl_flow_trans[] = { [ITEM_IPV6] = TRANS(ITEM_TCP, ITEM_UDP, PATTERN_COMMON), [ITEM_TCP] = TRANS(PATTERN_COMMON), [ITEM_UDP] = TRANS(ITEM_VXLAN, PATTERN_COMMON), - [ITEM_VXLAN] = TRANS(PATTERN_COMMON), + [ITEM_VXLAN] = TRANS(ITEM_TUN_ETH, PATTERN_COMMON), + [ITEM_VXLAN_END] = TRANS(ITEM_ETH, PATTERN_COMMON), + [ITEM_TUN_ETH] = TRANS(ITEM_TUN_IPV4, ITEM_TUN_IPV6, PATTERN_COMMON), + [ITEM_TUN_IPV4] = TRANS(ITEM_TUN_UDP, PATTERN_COMMON), + [ITEM_TUN_IPV6] = TRANS(ITEM_TUN_UDP, PATTERN_COMMON), + [ITEM_TUN_UDP] = TRANS(ITEM_VXLAN_END, ITEM_VOID, ITEM_PORT_ID), [ACTIONS] = TRANS(ACTIONS_FATE, ACTIONS_COMMON), [ACTION_VOID] = TRANS(BACK), [ACTION_PORT_ID] = TRANS(ACTION_VOID, END), @@ -805,6 +855,7 @@ mlx5_nl_flow_transpose(struct mlx5_nl_flow *nl_flow, bool vlan_present; bool vlan_eth_type_set; bool ip_proto_set; + bool vxlan_decap; struct mlx5_nl_flow_encap encap; struct nlattr *na_flower; struct nlattr *na_flower_act; @@ -819,6 +870,7 @@ mlx5_nl_flow_transpose(struct mlx5_nl_flow *nl_flow, goto error_nobufs; nl_flow->size = offsetof(struct mlx5_nl_flow, msg); nl_flow->applied = 0; + nl_flow->decap = 0; nl_flow->encap_ifindex = 0; nl_flow->ifindex_src = NULL; nl_flow->ifindex_dst = NULL; @@ -833,6 +885,7 @@ mlx5_nl_flow_transpose(struct mlx5_nl_flow *nl_flow, vlan_present = false; vlan_eth_type_set = false; ip_proto_set = false; + vxlan_decap = false; memset(&encap, 0, sizeof(encap)); na_flower = NULL; na_flower_act = NULL; @@ -850,6 +903,7 @@ mlx5_nl_flow_transpose(struct mlx5_nl_flow *nl_flow, const struct rte_flow_item_ipv6 *ipv6; const struct rte_flow_item_tcp *tcp; const struct rte_flow_item_udp *udp; + const struct rte_flow_item_vxlan *vxlan; } spec, mask; union { const struct rte_flow_action_port_id *port_id; @@ -943,9 +997,6 @@ mlx5_nl_flow_transpose(struct mlx5_nl_flow *nl_flow, na_flower = mnl_attr_nest_start_check(buf, size, TCA_OPTIONS); if (!na_flower) goto error_nobufs; - if (!mnl_attr_put_u32_check(buf, size, TCA_FLOWER_FLAGS, - TCA_CLS_FLAGS_SKIP_SW)) - goto error_nobufs; break; case ITEM_VOID: if (item->type != RTE_FLOW_ITEM_TYPE_VOID) @@ -1286,16 +1337,215 @@ mlx5_nl_flow_transpose(struct mlx5_nl_flow *nl_flow, ++item; break; case ITEM_VXLAN: + case ITEM_VXLAN_END: if (item->type != RTE_FLOW_ITEM_TYPE_VXLAN) goto trans; - return rte_flow_error_set - (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item, - "VXLAN header matching is not supported yet"); + if (vxlan_decap) { + /* Done with outer, continue with inner. */ + ++item; + break; + } + if (encap.mask) + return rte_flow_error_set + (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, + item, "no support for stacked encapsulation"); + mask.vxlan = mlx5_nl_flow_item_mask + (item, &rte_flow_item_vxlan_mask, + &mlx5_nl_flow_encap_mask_supported.vxlan, + &mlx5_nl_flow_mask_empty.vxlan, + sizeof(rte_flow_item_vxlan_mask), error); + if (!mask.vxlan) + return -rte_errno; + spec.vxlan = item->spec; + /* + * No TCA_FLOWER_* to match VXLAN traffic. This can only be + * done indirectly through ACTION_VXLAN_DECAP. + * + * Since tunnel encapsulation information must be collected + * from the previous pattern items, the message built so far + * must be discarded, inner traffic will be matched by + * subsequent pattern items. + * + * Reset inner context and process pattern again through a + * different path. + */ + eth_type_set = false; + vlan_present = false; + vlan_eth_type_set = false; + ip_proto_set = false; + nlh = buf; + mnl_attr_nest_cancel(nlh, na_flower); + na_flower = mnl_attr_nest_start_check(buf, size, TCA_OPTIONS); + if (!na_flower) + goto error_nobufs; + if (memcmp(mask.vxlan->vni, VXLAN_VNI_MASK, 3)) + return rte_flow_error_set + (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK, + mask.vxlan, + "VXLAN VNI is either incomplete or missing"); + if (!mnl_attr_put_u32_check(buf, size, + TCA_FLOWER_KEY_ENC_KEY_ID, + vxlan_vni_as_be32(spec.vxlan->vni))) + goto error_nobufs; + encap.vxlan.vni = vxlan_vni_as_be32(spec.vxlan->vni); + encap.mask |= BIT_ENCAP(VXLAN_VNI); + vxlan_decap = true; + item = pattern; + break; + case ITEM_TUN_ETH: + if (item->type != RTE_FLOW_ITEM_TYPE_ETH) + goto trans; + mask.eth = mlx5_nl_flow_item_mask + (item, &rte_flow_item_eth_mask, + &mlx5_nl_flow_encap_mask_supported.eth, + &mlx5_nl_flow_mask_empty.eth, + sizeof(rte_flow_item_eth_mask), error); + if (!mask.eth) + return -rte_errno; + spec.eth = item->spec; + if ((!is_zero_ether_addr(&mask.eth->dst) || + !is_zero_ether_addr(&mask.eth->src)) && + nl_flow != (void *)buf_tmp) + DRV_LOG(WARNING, + "Ethernet source/destination addresses cannot" + " be matched along with VXLAN traffic;" + " parameters ignored"); + /* Source and destination are swapped for decap. */ + if (is_broadcast_ether_addr(&mask.eth->dst)) { + encap.eth.src = spec.eth->dst; + encap.mask |= BIT_ENCAP(ETH_SRC); + } + if (is_broadcast_ether_addr(&mask.eth->src)) { + encap.eth.dst = spec.eth->src; + encap.mask |= BIT_ENCAP(ETH_DST); + } + ++item; + break; + case ITEM_TUN_IPV4: + if (item->type != RTE_FLOW_ITEM_TYPE_IPV4) + goto trans; + mask.ipv4 = mlx5_nl_flow_item_mask + (item, &rte_flow_item_ipv4_mask, + &mlx5_nl_flow_encap_mask_supported.ipv4, + &mlx5_nl_flow_mask_empty.ipv4, + sizeof(rte_flow_item_ipv4_mask), error); + if (!mask.ipv4) + return -rte_errno; + spec.ipv4 = item->spec; + if ((mask.ipv4->hdr.src_addr && + (!mnl_attr_put_u32_check(buf, size, + TCA_FLOWER_KEY_ENC_IPV4_SRC, + spec.ipv4->hdr.src_addr) || + !mnl_attr_put_u32_check(buf, size, + TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK, + mask.ipv4->hdr.src_addr))) || + (mask.ipv4->hdr.dst_addr && + (!mnl_attr_put_u32_check(buf, size, + TCA_FLOWER_KEY_ENC_IPV4_DST, + spec.ipv4->hdr.dst_addr) || + !mnl_attr_put_u32_check(buf, size, + TCA_FLOWER_KEY_ENC_IPV4_DST_MASK, + mask.ipv4->hdr.dst_addr)))) + goto error_nobufs; + /* Source and destination are swapped for decap. */ + if (mask.ipv4->hdr.src_addr == IN_ADDR_MASK) { + encap.ip.dst.v4.s_addr = spec.ipv4->hdr.src_addr; + encap.mask |= BIT_ENCAP(IPV4_DST); + } + if (mask.ipv4->hdr.dst_addr == IN_ADDR_MASK) { + encap.ip.src.v4.s_addr = spec.ipv4->hdr.dst_addr; + encap.mask |= BIT_ENCAP(IPV4_SRC); + } + ++item; + break; + case ITEM_TUN_IPV6: + if (item->type != RTE_FLOW_ITEM_TYPE_IPV6) + goto trans; + mask.ipv6 = mlx5_nl_flow_item_mask + (item, &rte_flow_item_ipv6_mask, + &mlx5_nl_flow_encap_mask_supported.ipv6, + &mlx5_nl_flow_mask_empty.ipv6, + sizeof(rte_flow_item_ipv6_mask), error); + if (!mask.ipv6) + return -rte_errno; + spec.ipv6 = item->spec; + if ((!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr) && + (!mnl_attr_put_check(buf, size, + TCA_FLOWER_KEY_ENC_IPV6_SRC, + sizeof(spec.ipv6->hdr.src_addr), + spec.ipv6->hdr.src_addr) || + !mnl_attr_put_check(buf, size, + TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK, + sizeof(mask.ipv6->hdr.src_addr), + mask.ipv6->hdr.src_addr))) || + (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr) && + (!mnl_attr_put_check(buf, size, + TCA_FLOWER_KEY_ENC_IPV6_DST, + sizeof(spec.ipv6->hdr.dst_addr), + spec.ipv6->hdr.dst_addr) || + !mnl_attr_put_check(buf, size, + TCA_FLOWER_KEY_ENC_IPV6_DST_MASK, + sizeof(mask.ipv6->hdr.dst_addr), + mask.ipv6->hdr.dst_addr)))) + goto error_nobufs; + /* Source and destination are swapped for decap. */ + if (!memcmp(mask.ipv6->hdr.src_addr, IN6_ADDR_MASK, 16)) { + encap.ip.dst.v6 = + *(struct in6_addr *)&spec.ipv6->hdr.src_addr; + encap.mask |= BIT_ENCAP(IPV6_DST); + } + if (!memcmp(mask.ipv6->hdr.dst_addr, IN6_ADDR_MASK, 16)) { + encap.ip.src.v6 = + *(struct in6_addr *)&spec.ipv6->hdr.dst_addr; + encap.mask |= BIT_ENCAP(IPV6_SRC); + } + ++item; + break; + case ITEM_TUN_UDP: + if (item->type != RTE_FLOW_ITEM_TYPE_UDP) + goto trans; + mask.udp = mlx5_nl_flow_item_mask + (item, &rte_flow_item_udp_mask, + &mlx5_nl_flow_encap_mask_supported.udp, + &mlx5_nl_flow_mask_empty.udp, + sizeof(rte_flow_item_udp_mask), error); + if (!mask.udp) + return -rte_errno; + spec.udp = item->spec; + if ((mask.udp->hdr.src_port && + (!mnl_attr_put_u16_check(buf, size, + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT, + spec.udp->hdr.src_port) || + !mnl_attr_put_u16_check + (buf, size, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK, + mask.udp->hdr.src_port))) || + (mask.udp->hdr.dst_port && + (!mnl_attr_put_u16_check(buf, size, + TCA_FLOWER_KEY_ENC_UDP_DST_PORT, + spec.udp->hdr.dst_port) || + !mnl_attr_put_u16_check + (buf, size, TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, + mask.udp->hdr.dst_port)))) + goto error_nobufs; + /* Source and destination are swapped for decap. */ + if (mask.udp->hdr.src_port == BE16_MASK) { + encap.udp.dst = spec.udp->hdr.src_port; + encap.mask |= BIT_ENCAP(UDP_DST); + } + if (mask.udp->hdr.dst_port == BE16_MASK) { + encap.udp.src = spec.udp->hdr.dst_port; + encap.mask |= BIT_ENCAP(UDP_SRC); + } + ++item; + break; case ACTIONS: if (item->type != RTE_FLOW_ITEM_TYPE_END) goto trans; assert(na_flower); assert(!na_flower_act); + if (!mnl_attr_put_u32_check(buf, size, TCA_FLOWER_FLAGS, + TCA_CLS_FLAGS_SKIP_SW)) + goto error_nobufs; na_flower_act = mnl_attr_nest_start_check(buf, size, TCA_FLOWER_ACT); if (!na_flower_act) @@ -1446,14 +1696,35 @@ mlx5_nl_flow_transpose(struct mlx5_nl_flow *nl_flow, } ++action; break; + case ACTION_VXLAN_DECAP: + if (action->type != RTE_FLOW_ACTION_TYPE_VXLAN_DECAP) + goto trans; + if (!vxlan_decap) + return rte_flow_error_set + (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, + action, + "VXLAN decapsulation is only supported after" + " matching VXLAN traffic explicitly first"); + i = TCA_TUNNEL_KEY_ACT_RELEASE; + nl_flow->decap = 1; + conf.vxlan_encap = NULL; + goto vxlan_encap; case ACTION_VXLAN_ENCAP: if (action->type != RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP) goto trans; + if (vxlan_decap) + return rte_flow_error_set + (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, + action, + "cannot combine VXLAN header matching with" + " encapsulation"); conf.vxlan_encap = action->conf; if (mlx5_nl_flow_encap_reap(&encap, conf.vxlan_encap->definition, error)) return -rte_errno; + i = TCA_TUNNEL_KEY_ACT_SET; +vxlan_encap: act_index = mnl_attr_nest_start_check(buf, size, act_index_cur++); if (!act_index || @@ -1467,10 +1738,11 @@ mlx5_nl_flow_transpose(struct mlx5_nl_flow *nl_flow, sizeof(struct tc_tunnel_key), &(struct tc_tunnel_key){ .action = TC_ACT_PIPE, - .t_action = - TCA_TUNNEL_KEY_ACT_SET, + .t_action = i, })) goto error_nobufs; + if (!conf.vxlan_encap) + goto vxlan_encap_end; if (encap.mask & BIT_ENCAP(IPV4_SRC) && !mnl_attr_put_u32_check (buf, size, TCA_TUNNEL_KEY_ENC_IPV4_SRC, @@ -1507,16 +1779,11 @@ mlx5_nl_flow_transpose(struct mlx5_nl_flow *nl_flow, if (!mnl_attr_put_u32_check (buf, size, TCA_TUNNEL_KEY_ENC_KEY_ID, encap.vxlan.vni)) goto error_nobufs; +vxlan_encap_end: mnl_attr_nest_end(buf, act); mnl_attr_nest_end(buf, act_index); ++action; break; - case ACTION_VXLAN_DECAP: - if (action->type != RTE_FLOW_ACTION_TYPE_VXLAN_DECAP) - goto trans; - return rte_flow_error_set - (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, action, - "VXLAN decap is not supported yet"); case END: if (item->type != RTE_FLOW_ITEM_TYPE_END || action->type != RTE_FLOW_ACTION_TYPE_END) @@ -1844,15 +2111,26 @@ mlx5_nl_flow_ifindex_vxlan(struct mlx5_nl_flow_ctx *ctx, unsigned int ifindex, * cannot be worked around by picking a random value here and using * a different one when creating flow rules later. * - * Therefore request a hopefully unique VNI based on the interface - * index in order to work around EEXIST. VNI will be overridden - * later on a flow rule basis thanks to IFLA_VXLAN_COLLECT_METADATA. + * There is another way to work around EEXIST by assigning a unique + * VNI to the VXLAN interface (e.g. by emitting IFLA_VXLAN_ID based + * on underlying ifindex), however doing so breaks decap as it + * prevents the kernel from matching VNI when looking for a VXLAN + * interface in that direction. Note that iproute2 doesn't allow + * this combination either. + * + * Creating non-external VXLAN interfaces with fixed outer + * properties was also considered. Problem is that not only it won't + * scale to large numbers, it appears that only interfaces with + * dynamic properties (external) can be offloaded to hardware. + * + * Hence the following limitation: as long as VXLAN encap/decap flow + * rules exist on a given DPDK port, the local UDP port they rely on + * can only be used by flow rules on that port. They will fail with + * EEXIST on others. */ if (!mnl_attr_put_u16_check(nlh, sizeof(buf), IFLA_VXLAN_PORT, vxlan_port)) goto exit; - if (!mnl_attr_put_u32_check(nlh, sizeof(buf), IFLA_VXLAN_ID, ifindex)) - goto exit; mnl_attr_nest_end(nlh, na_vxlan); mnl_attr_nest_end(nlh, na_info); ret = mlx5_nl_flow_chat(ctx, nlh, NULL, NULL); @@ -2022,8 +2300,9 @@ mlx5_nl_flow_encap_neigh(struct mlx5_nl_flow_ctx *ctx, goto error_nobufs; if (encap->mask & BIT_ENCAP(ETH_SRC) && enable) DRV_LOG(WARNING, - "Ethernet source address cannot be forced" - " for VXLAN encap; parameter ignored"); + "Ethernet source address (encap) or destination" + " address (decap) cannot be forced for VXLAN" + " encap/decap; parameter ignored"); if (encap->mask & BIT_ENCAP(ETH_DST) && !mnl_attr_put_check(nlh, sizeof(buf), NDA_LLADDR, sizeof(encap->eth.dst), &encap->eth.dst)) @@ -2325,9 +2604,12 @@ mlx5_nl_flow_create(struct mlx5_nl_flow_ctx *ctx, struct mlx5_nl_flow *nl_flow, { struct nlmsghdr *nlh = (void *)nl_flow->msg; struct mlx5_nl_flow_encap *encap = - nl_flow->encap && nl_flow->ifindex_dst ? + nl_flow->encap && nl_flow->ifindex_dst && nl_flow->ifindex_src ? nl_flow->encap : NULL; - unsigned int ifindex = encap ? *nl_flow->ifindex_dst : 0; + unsigned int *ifindex_target = + nl_flow->decap ? + nl_flow->ifindex_src : nl_flow->ifindex_dst; + unsigned int ifindex = encap ? *ifindex_target : 0; int ret; if (nl_flow->applied) @@ -2339,11 +2621,11 @@ mlx5_nl_flow_create(struct mlx5_nl_flow_ctx *ctx, struct mlx5_nl_flow *nl_flow, (ctx, encap, ifindex, true, error); if (!nl_flow->encap_ifindex) return -rte_errno; - *nl_flow->ifindex_dst = nl_flow->encap_ifindex; + *ifindex_target = nl_flow->encap_ifindex; } ret = mlx5_nl_flow_chat(ctx, nlh, NULL, NULL); if (encap) - *nl_flow->ifindex_dst = ifindex; + *ifindex_target = ifindex; if (!ret) { nl_flow->applied = 1; return 0; @@ -2378,9 +2660,11 @@ mlx5_nl_flow_destroy(struct mlx5_nl_flow_ctx *ctx, struct mlx5_nl_flow *nl_flow, { struct nlmsghdr *nlh = (void *)nl_flow->msg; struct mlx5_nl_flow_encap *encap = - nl_flow->encap && nl_flow->ifindex_dst ? + nl_flow->encap && nl_flow->ifindex_dst && nl_flow->ifindex_src ? nl_flow->encap : NULL; - unsigned int ifindex = encap ? *nl_flow->ifindex_dst : 0; + unsigned int *ifindex_target = + nl_flow->decap ? nl_flow->ifindex_src : nl_flow->ifindex_dst; + unsigned int ifindex = encap ? *ifindex_target : 0; int err = 0; int ret; @@ -2392,11 +2676,11 @@ mlx5_nl_flow_destroy(struct mlx5_nl_flow_ctx *ctx, struct mlx5_nl_flow *nl_flow, if (!mlx5_nl_flow_encap_ifindex (ctx, encap, ifindex, false, error)) err = rte_errno; - *nl_flow->ifindex_dst = nl_flow->encap_ifindex; + *ifindex_target = nl_flow->encap_ifindex; } ret = mlx5_nl_flow_chat(ctx, nlh, NULL, NULL); if (encap) - *nl_flow->ifindex_dst = ifindex; + *ifindex_target = ifindex; nl_flow->applied = 0; if (err) { rte_errno = err; -- 2.11.0