Add support for MPLS over GRE and MPLS over UDP tunnel types as described in the next RFCs: 1. https://tools.ietf.org/html/rfc4023 2. https://tools.ietf.org/html/rfc7510 3. https://tools.ietf.org/html/rfc4385
Signed-off-by: Matan Azrad <ma...@mellanox.com> --- doc/guides/nics/mlx5.rst | 4 +- drivers/net/mlx5/Makefile | 5 ++ drivers/net/mlx5/mlx5.c | 13 ++++ drivers/net/mlx5/mlx5.h | 1 + drivers/net/mlx5/mlx5_flow.c | 154 +++++++++++++++++++++++++++++++++++++++++-- 5 files changed, 170 insertions(+), 7 deletions(-) diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst index a7d5c90..2b110f4 100644 --- a/doc/guides/nics/mlx5.rst +++ b/doc/guides/nics/mlx5.rst @@ -74,7 +74,7 @@ Features - RX interrupts. - Statistics query including Basic, Extended and per queue. - Rx HW timestamp. -- Tunnel types: VXLAN, L3 VXLAN, VXLAN-GPE, GRE. +- Tunnel types: VXLAN, L3 VXLAN, VXLAN-GPE, GRE, MPLSoGRE, MPLSoUDP. - Tunnel HW offloads: packet type, inner/outer RSS, IP and UDP checksum verification. Limitations @@ -113,6 +113,8 @@ Limitations - VXLAN TSO and checksum offloads are not supported on VM. +- L3 VXLAN and VXLAN-GPE tunnels cannot be supported together with MPLSoGRE and MPLSoUDP. + - VF: flow rules created on VF devices can only match traffic targeted at the configured MAC addresses (see ``rte_eth_dev_mac_addr_add()``). diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile index 8d64d4c..293144e 100644 --- a/drivers/net/mlx5/Makefile +++ b/drivers/net/mlx5/Makefile @@ -108,6 +108,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh enum MLX5DV_CONTEXT_MASK_TUNNEL_OFFLOADS \ $(AUTOCONF_OUTPUT) $Q sh -- '$<' '$@' \ + HAVE_IBV_DEVICE_MPLS_SUPPORT \ + infiniband/verbs.h \ + enum IBV_FLOW_SPEC_MPLS \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ HAVE_IBV_WQ_FLAG_RX_END_PADDING \ infiniband/verbs.h \ enum IBV_WQ_FLAG_RX_END_PADDING \ diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 8aa91cc..225ebd4 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -690,6 +690,7 @@ unsigned int mps; unsigned int cqe_comp; unsigned int tunnel_en = 0; + unsigned int mpls_en = 0; unsigned int swp = 0; unsigned int verb_priorities = 0; unsigned int mprq = 0; @@ -850,6 +851,17 @@ DRV_LOG(WARNING, "tunnel offloading disabled due to old OFED/rdma-core version"); #endif +#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT + mpls_en = ((attrs_out.tunnel_offloads_caps & + MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_CW_MPLS_OVER_GRE) && + (attrs_out.tunnel_offloads_caps & + MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_CW_MPLS_OVER_UDP)); + DRV_LOG(DEBUG, "MPLS over GRE/UDP tunnel offloading is %ssupported", + mpls_en ? "" : "not "); +#else + DRV_LOG(WARNING, "MPLS over GRE/UDP tunnel offloading disabled due to" + " old OFED/rdma-core version or firmware configuration"); +#endif err = mlx5_glue->query_device_ex(attr_ctx, NULL, &device_attr); if (err) { DEBUG("ibv_query_device_ex() failed"); @@ -873,6 +885,7 @@ .cqe_comp = cqe_comp, .mps = mps, .tunnel_en = tunnel_en, + .mpls_en = mpls_en, .tx_vec_en = 1, .rx_vec_en = 1, .mpw_hdr_dseg = 0, diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index c4c962b..7750832 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -93,6 +93,7 @@ struct mlx5_dev_config { unsigned int mps:2; /* Multi-packet send supported mode. */ unsigned int tunnel_en:1; /* Whether tunnel stateless offloads are supported. */ + unsigned int mpls_en:1; /* MPLS over GRE/UDP is enabled. */ unsigned int flow_counter_en:1; /* Whether flow counter is supported. */ unsigned int cqe_comp:1; /* CQE compression is enabled. */ unsigned int tso:1; /* Whether TSO is supported. */ diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index ec6d00f..432fde0 100644 --- a/drivers/net/mlx5/mlx5_flow.c +++ b/drivers/net/mlx5/mlx5_flow.c @@ -101,6 +101,11 @@ struct mlx5_flow_data { const void *default_mask, struct mlx5_flow_data *data); +static int +mlx5_flow_create_mpls(const struct rte_flow_item *item, + const void *default_mask, + struct mlx5_flow_data *data); + struct mlx5_flow_parse; static void @@ -248,12 +253,14 @@ struct rte_flow { #define IS_TUNNEL(type) ( \ (type) == RTE_FLOW_ITEM_TYPE_VXLAN || \ (type) == RTE_FLOW_ITEM_TYPE_VXLAN_GPE || \ - (type) == RTE_FLOW_ITEM_TYPE_GRE) + (type) == RTE_FLOW_ITEM_TYPE_GRE || \ + (type) == RTE_FLOW_ITEM_TYPE_MPLS) const uint32_t flow_ptype[] = { [RTE_FLOW_ITEM_TYPE_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN, [RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = RTE_PTYPE_TUNNEL_VXLAN_GPE, [RTE_FLOW_ITEM_TYPE_GRE] = RTE_PTYPE_TUNNEL_GRE, + [RTE_FLOW_ITEM_TYPE_MPLS] = RTE_PTYPE_TUNNEL_MPLS_IN_GRE, }; #define PTYPE_IDX(t) ((RTE_PTYPE_TUNNEL_MASK & (t)) >> 12) @@ -264,6 +271,10 @@ struct rte_flow { [PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)] = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP, [PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)] = RTE_PTYPE_TUNNEL_GRE, + [PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_GRE)] = + RTE_PTYPE_TUNNEL_MPLS_IN_GRE, + [PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_UDP)] = + RTE_PTYPE_TUNNEL_MPLS_IN_GRE | RTE_PTYPE_L4_UDP, }; /** Structure to generate a simple graph of layers supported by the NIC. */ @@ -400,7 +411,8 @@ struct mlx5_flow_items { }, [RTE_FLOW_ITEM_TYPE_UDP] = { .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN, - RTE_FLOW_ITEM_TYPE_VXLAN_GPE), + RTE_FLOW_ITEM_TYPE_VXLAN_GPE, + RTE_FLOW_ITEM_TYPE_MPLS), .actions = valid_actions, .mask = &(const struct rte_flow_item_udp){ .hdr = { @@ -429,7 +441,8 @@ struct mlx5_flow_items { [RTE_FLOW_ITEM_TYPE_GRE] = { .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH, RTE_FLOW_ITEM_TYPE_IPV4, - RTE_FLOW_ITEM_TYPE_IPV6), + RTE_FLOW_ITEM_TYPE_IPV6, + RTE_FLOW_ITEM_TYPE_MPLS), .actions = valid_actions, .mask = &(const struct rte_flow_item_gre){ .protocol = -1, @@ -437,7 +450,26 @@ struct mlx5_flow_items { .default_mask = &rte_flow_item_gre_mask, .mask_sz = sizeof(struct rte_flow_item_gre), .convert = mlx5_flow_create_gre, +#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT + .dst_sz = sizeof(struct ibv_flow_spec_gre), +#else .dst_sz = sizeof(struct ibv_flow_spec_tunnel), +#endif + }, + [RTE_FLOW_ITEM_TYPE_MPLS] = { + .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_IPV6), + .actions = valid_actions, + .mask = &(const struct rte_flow_item_mpls){ + .label_tc_s = "\xff\xff\xf0", + }, + .default_mask = &rte_flow_item_mpls_mask, + .mask_sz = sizeof(struct rte_flow_item_mpls), + .convert = mlx5_flow_create_mpls, +#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT + .dst_sz = sizeof(struct ibv_flow_spec_mpls), +#endif }, [RTE_FLOW_ITEM_TYPE_VXLAN] = { .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH, @@ -896,7 +928,9 @@ struct ibv_spec_header { if (ret) goto exit_item_not_supported; if (IS_TUNNEL(items->type)) { - if (parser->tunnel) { + if (parser->tunnel && + !((items - 1)->type == RTE_FLOW_ITEM_TYPE_GRE && + items->type == RTE_FLOW_ITEM_TYPE_MPLS)) { rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, items, @@ -904,6 +938,16 @@ struct ibv_spec_header { " tunnel encapsulations."); return -rte_errno; } + if (items->type == RTE_FLOW_ITEM_TYPE_MPLS && + !priv->config.mpls_en) { + rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM, + items, + "MPLS not supported or" + " disabled in firmware" + " configuration."); + return -rte_errno; + } if (!priv->config.tunnel_en && parser->rss_conf.level > 1) { rte_flow_error_set(error, ENOTSUP, @@ -1878,16 +1922,27 @@ struct ibv_spec_header { * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -mlx5_flow_create_gre(const struct rte_flow_item *item __rte_unused, - const void *default_mask __rte_unused, +mlx5_flow_create_gre(const struct rte_flow_item *item, + const void *default_mask, struct mlx5_flow_data *data) { struct mlx5_flow_parse *parser = data->parser; +#ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT + (void)default_mask; unsigned int size = sizeof(struct ibv_flow_spec_tunnel); struct ibv_flow_spec_tunnel tunnel = { .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL, .size = size, }; +#else + const struct rte_flow_item_gre *spec = item->spec; + const struct rte_flow_item_gre *mask = item->mask; + unsigned int size = sizeof(struct ibv_flow_spec_gre); + struct ibv_flow_spec_gre tunnel = { + .type = parser->inner | IBV_FLOW_SPEC_GRE, + .size = size, + }; +#endif struct ibv_flow_spec_ipv4_ext *ipv4; struct ibv_flow_spec_ipv6 *ipv6; unsigned int i; @@ -1899,6 +1954,20 @@ struct ibv_spec_header { /* Default GRE to inner RSS. */ if (!parser->rss_conf.level) parser->rss_conf.level = 2; +#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT + if (spec) { + if (!mask) + mask = default_mask; + tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver; + tunnel.val.protocol = spec->protocol; + tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver; + tunnel.mask.protocol = mask->protocol; + /* Remove unwanted bits from values. */ + tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver; + tunnel.val.protocol &= tunnel.mask.protocol; + tunnel.val.key &= tunnel.mask.key; + } +#endif /* Update encapsulation IP layer protocol. */ for (i = 0; i != hash_rxq_init_n; ++i) { if (!parser->queue[i].ibv_attr) @@ -1932,6 +2001,79 @@ struct ibv_spec_header { } /** + * Convert MPLS item to Verbs specification. + * MPLS tunnel types currently supported are MPLS-in-GRE and MPLS-in-UDP. + * + * @param item[in] + * Item specification. + * @param default_mask[in] + * Default bit-masks to use when item->mask is not provided. + * @param data[in, out] + * User structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +mlx5_flow_create_mpls(const struct rte_flow_item *item, + const void *default_mask, + struct mlx5_flow_data *data) +{ +#ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT + (void)default_mask; + return rte_flow_error_set(data->error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "MPLS is not supported by driver"); +#else + const struct rte_flow_item_mpls *spec = item->spec; + const struct rte_flow_item_mpls *mask = item->mask; + struct mlx5_flow_parse *parser = data->parser; + unsigned int size = sizeof(struct ibv_flow_spec_mpls); + struct ibv_flow_spec_mpls mpls = { + .type = IBV_FLOW_SPEC_MPLS, + .size = size, + }; + + parser->inner = IBV_FLOW_SPEC_INNER; + if (parser->layer == HASH_RXQ_UDPV4 || + parser->layer == HASH_RXQ_UDPV6) { + parser->tunnel = + ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_UDP)]; + parser->out_layer = parser->layer; + } else { + parser->tunnel = + ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_GRE)]; + /* parser->out_layer stays as in GRE out_layer. */ + } + parser->layer = HASH_RXQ_TUNNEL; + /* + * For MPLS-in-GRE, RSS level should have been set. + * For MPLS-in-UDP, use outer RSS. + */ + if (!parser->rss_conf.level) + parser->rss_conf.level = 1; + if (spec) { + if (!mask) + mask = default_mask; + /* + * The verbs label field includes the entire MPLS header: + * bits 0:19 - label value field. + * bits 20:22 - traffic class field. + * bits 23 - bottom of stack bit. + * bits 24:31 - ttl field. + */ + mpls.val.label = *(const uint32_t *)spec; + mpls.mask.label = *(const uint32_t *)mask; + /* Remove unwanted bits from values. */ + mpls.val.label &= mpls.mask.label; + } + mlx5_flow_create_copy(parser, &mpls, size); + return 0; +#endif +} + +/** * Convert mark/flag action to Verbs specification. * * @param parser -- 1.9.5