Current Linux kernel git tree has included VxLAN-gpe implementation author Jiri Benc <jb...@redhat.com> committer David S. Miller <da...@davemloft.net> commit e1e5314de08ba6003b358125eafc9ad9e75a950c (patch) tree 1e18cdabf1c9d9ef17e26c6480e629465447f77f /drivers/net/vxlan.c parent a6d5bbf34efa8330af7b0b1dba0f38148516ed97 (diff) vxlan: implement GPE
This patch is to port it to ovs in order that people also can use VxLAN-gpe even if they don't replace their kernels with latest Linux kernel. Signed-off-by: Johnson Li <johnson...@intel.com> Signed-off-by: Yi Yang <yi.y.y...@intel.com> --- datapath/linux/compat/include/linux/if_link.h | 2 + datapath/linux/compat/include/linux/openvswitch.h | 1 + datapath/linux/compat/include/net/vxlan.h | 72 ++++++++++ datapath/linux/compat/vxlan.c | 160 +++++++++++++++++++--- datapath/vport-netdev.c | 3 +- datapath/vport-vxlan.c | 17 ++- lib/netdev-vport.c | 4 +- 7 files changed, 236 insertions(+), 23 deletions(-) diff --git a/datapath/linux/compat/include/linux/if_link.h b/datapath/linux/compat/include/linux/if_link.h index 6209dcb..4fe5add 100644 --- a/datapath/linux/compat/include/linux/if_link.h +++ b/datapath/linux/compat/include/linux/if_link.h @@ -100,6 +100,8 @@ enum { IFLA_VXLAN_REMCSUM_NOPARTIAL, #define IFLA_VXLAN_COLLECT_METADATA rpl_IFLA_VXLAN_COLLECT_METADATA IFLA_VXLAN_COLLECT_METADATA, +#define IFLA_VXLAN_GPE rpl_IFLA_VXLAN_GPE + IFLA_VXLAN_GPE, #define __IFLA_VXLAN_MAX rpl___IFLA_VXLAN_MAX __IFLA_VXLAN_MAX }; diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h index 3b39ebb..bd37594 100644 --- a/datapath/linux/compat/include/linux/openvswitch.h +++ b/datapath/linux/compat/include/linux/openvswitch.h @@ -287,6 +287,7 @@ enum ovs_vport_attr { enum { OVS_VXLAN_EXT_UNSPEC, OVS_VXLAN_EXT_GBP, /* Flag or __u32 */ + OVS_VXLAN_EXT_GPE, /* Flag, Generic Protocol Extension */ __OVS_VXLAN_EXT_MAX, }; diff --git a/datapath/linux/compat/include/net/vxlan.h b/datapath/linux/compat/include/net/vxlan.h index 75a5a7a..25b3e24 100644 --- a/datapath/linux/compat/include/net/vxlan.h +++ b/datapath/linux/compat/include/net/vxlan.h @@ -84,6 +84,66 @@ struct vxlanhdr_gbp { #define VXLAN_GBP_POLICY_APPLIED (BIT(3) << 16) #define VXLAN_GBP_ID_MASK (0xFFFF) +/* + * VXLAN Generic Protocol Extension (VXLAN_F_GPE): + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |R|R|Ver|I|P|R|O| Reserved |Next Protocol | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | VXLAN Network Identifier (VNI) | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * Ver = Version. Indicates VXLAN GPE protocol version. + * + * P = Next Protocol Bit. The P bit is set to indicate that the + * Next Protocol field is present. + * + * O = OAM Flag Bit. The O bit is set to indicate that the packet + * is an OAM packet. + * + * Next Protocol = This 8 bit field indicates the protocol header + * immediately following the VXLAN GPE header. + * + * https://tools.ietf.org/html/draft-ietf-nvo3-vxlan-gpe-01 + */ + +struct vxlanhdr_gpe { +#if defined(__LITTLE_ENDIAN_BITFIELD) + u8 oam_flag:1, + reserved_flags1:1, + np_applied:1, + instance_applied:1, + version:2, +reserved_flags2:2; +#elif defined(__BIG_ENDIAN_BITFIELD) + u8 reserved_flags2:2, + version:2, + instance_applied:1, + np_applied:1, + reserved_flags1:1, + oam_flag:1; +#endif + u8 reserved_flags3; + u8 reserved_flags4; + u8 next_protocol; + __be32 vx_vni; +}; + +/* VXLAN-GPE header flags. */ +#define VXLAN_HF_VER (BIT(29) | BIT(28)) +#define VXLAN_HF_NP (BIT(26)) +#define VXLAN_HF_OAM (BIT(24)) +#define VXLAN_HF_GPE (BIT(26)) + +#define VXLAN_GPE_USED_BITS (VXLAN_HF_VER | VXLAN_HF_NP | VXLAN_HF_OAM | \ + (0xFF)) + +/* VXLAN-GPE header Next Protocol. */ +#define VXLAN_GPE_NP_IPV4 0x01 +#define VXLAN_GPE_NP_IPV6 0x02 +#define VXLAN_GPE_NP_ETHERNET 0x03 +#define VXLAN_GPE_NP_NSH 0x04 +#define ETH_P_NSH 0x894f + /* VXLAN protocol header: * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |G|R|R|R|I|R|R|C| Reserved | @@ -120,6 +180,7 @@ struct vxlanhdr { struct vxlan_metadata { __be32 vni; u32 gbp; + u32 gpe; }; #define VNI_HASH_BITS 10 @@ -205,15 +266,26 @@ struct vxlan_dev { #define VXLAN_F_GBP 0x800 #define VXLAN_F_REMCSUM_NOPARTIAL 0x1000 #define VXLAN_F_COLLECT_METADATA 0x2000 +#define VXLAN_F_GPE 0x4000 /* Flags that are used in the receive path. These flags must match in * order for a socket to be shareable */ #define VXLAN_F_RCV_FLAGS (VXLAN_F_GBP | \ + VXLAN_F_GPE | \ VXLAN_F_UDP_ZERO_CSUM6_RX | \ VXLAN_F_REMCSUM_RX | \ VXLAN_F_REMCSUM_NOPARTIAL | \ VXLAN_F_COLLECT_METADATA) + +/* Flags that can be set together with VXLAN_F_GPE. */ +#define VXLAN_F_ALLOWED_GPE (VXLAN_F_GPE | \ + VXLAN_F_IPV6 | \ + VXLAN_F_UDP_ZERO_CSUM | \ + VXLAN_F_UDP_ZERO_CSUM6_TX | \ + VXLAN_F_UDP_ZERO_CSUM6_RX | \ + VXLAN_F_COLLECT_METADATA) + #define vxlan_dev_create rpl_vxlan_dev_create struct net_device *rpl_vxlan_dev_create(struct net *net, const char *name, u8 name_assign_type, struct vxlan_config *conf); diff --git a/datapath/linux/compat/vxlan.c b/datapath/linux/compat/vxlan.c index 4faa18f..6f25dff 100644 --- a/datapath/linux/compat/vxlan.c +++ b/datapath/linux/compat/vxlan.c @@ -812,6 +812,45 @@ static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh, } #endif +static bool vxlan_parse_gpe_hdr(struct vxlanhdr *unparsed, + __be32 *protocol, + struct sk_buff *skb, u32 vxflags) +{ + struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)unparsed; + + /* Need to have Next Protocol set for interfaces in GPE mode. */ + if (!gpe->np_applied) + return false; + /* "The initial version is 0. If a receiver does not support the + * version indicated it MUST drop the packet. + */ + if (gpe->version != 0) + return false; + /* "When the O bit is set to 1, the packet is an OAM packet and OAM + * processing MUST occur." However, we don't implement OAM + * processing, thus drop the packet. + */ + if (gpe->oam_flag) + return false; + + switch (gpe->next_protocol) { + case VXLAN_GPE_NP_IPV4: + *protocol = htons(ETH_P_IP); + break; + case VXLAN_GPE_NP_IPV6: + *protocol = htons(ETH_P_IPV6); + break; + case VXLAN_GPE_NP_ETHERNET: + *protocol = htons(ETH_P_TEB); + break; + default: + return false; + } + + unparsed->vx_flags &= ~VXLAN_GPE_USED_BITS; + return true; +} + static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, struct vxlan_metadata *md, u32 vni, struct metadata_dst *tun_dst) @@ -822,6 +861,9 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, struct pcpu_sw_netstats *stats; union vxlan_addr saddr; int err = 0; + struct vxlanhdr unparsed; + __be32 protocol = htons(ETH_P_TEB); + bool raw_proto = false; /* For flow based devices, map all packets to VNI 0 */ if (vs->flags & VXLAN_F_COLLECT_METADATA) @@ -832,14 +874,35 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, if (!vxlan) goto drop; - skb_reset_mac_header(skb); - skb_scrub_packet(skb, !net_eq(vxlan->net, dev_net(vxlan->dev))); - skb->protocol = eth_type_trans(skb, vxlan->dev); - skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); - - /* Ignore packet loops (and multicast echo) */ - if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr)) - goto drop; + /* For backwards compatibility, only allow reserved fields to be + * used by VXLAN extensions if explicitly requested. + */ + if (vs->flags & VXLAN_F_GPE) { + unparsed = *(struct vxlanhdr *)(udp_hdr(skb) + 1); + if (!vxlan_parse_gpe_hdr(&unparsed, &protocol, skb, vs->flags)) + goto drop; + if (protocol != htons(ETH_P_TEB)) { + raw_proto = true; + } + } + + if (!raw_proto) { + skb_reset_mac_header(skb); + skb_scrub_packet(skb, !net_eq(vxlan->net, dev_net(vxlan->dev))); + skb->protocol = eth_type_trans(skb, vxlan->dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); + + /* Ignore packet loops (and multicast echo) */ + if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr)) + goto drop; + + if ((vxlan->flags & VXLAN_F_LEARN) && + vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source)) + goto drop; + } else { + skb->dev = vxlan->dev; + skb->pkt_type = PACKET_HOST; + } /* Get data from the outer IP header */ if (vxlan_get_sk_family(vs) == AF_INET) { @@ -861,10 +924,6 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, goto drop; } - if ((vxlan->flags & VXLAN_F_LEARN) && - vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source)) - goto drop; - skb_reset_network_header(skb); /* In flow-based mode, GBP is carried in dst_metadata */ if (!(vs->flags & VXLAN_F_COLLECT_METADATA)) @@ -908,6 +967,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) struct metadata_dst dst; char buf[sizeof(struct metadata_dst) + sizeof(*md)]; } buf; + struct vxlanhdr unparsed; + __be32 protocol = htons(ETH_P_TEB); /* Need Vxlan and inner Ethernet header to be present */ if (!pskb_may_pull(skb, VXLAN_HLEN)) @@ -924,14 +985,23 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) goto bad_flags; } - if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB))) - goto drop; - vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1); - vs = rcu_dereference_sk_user_data(sk); if (!vs) goto drop; + /* For backwards compatibility, only allow reserved fields to be + * used by VXLAN extensions if explicitly requested. + */ + if (vs->flags & VXLAN_F_GPE) { + unparsed = *(struct vxlanhdr *)(udp_hdr(skb) + 1); + if (!vxlan_parse_gpe_hdr(&unparsed, &protocol, skb, vs->flags)) + goto drop; + } + + if (iptunnel_pull_header(skb, VXLAN_HLEN, protocol)) + goto drop; + vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1); + #ifdef HAVE_VXLAN_HF_RCO if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) { vxh = vxlan_remcsum(skb, vxh, sizeof(struct vxlanhdr), vni, @@ -971,7 +1041,16 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) md->gbp |= VXLAN_GBP_POLICY_APPLIED; flags &= ~VXLAN_GBP_USED_BITS; - } + } else if ((flags & VXLAN_HF_GPE) && (vs->flags & VXLAN_F_GPE)) { + struct vxlanhdr_gpe *gpe; + + gpe = (struct vxlanhdr_gpe *)vxh; + md->gpe = ntohs(gpe->next_protocol); + + buf.dst.u.tun_info.key.tun_flags |= TUNNEL_VXLAN_OPT; + + flags &= ~VXLAN_GPE_USED_BITS; + } if (flags || vni & ~VXLAN_VNI_MASK) { /* If there are any unprocessed flags remaining treat @@ -1023,6 +1102,33 @@ static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags, gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK); } +static int vxlan_build_gpe_hdr(struct vxlanhdr *vxh, u32 vxflags, + __be16 protocol) +{ + struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)vxh; + + vxh->vx_flags |= htonl(VXLAN_HF_GPE); + gpe->np_applied = 1; + gpe->version = 0; + gpe->oam_flag = 0; + + switch (protocol) { + case htons(ETH_P_IP): + gpe->next_protocol = VXLAN_GPE_NP_IPV4; + return 0; + case htons(ETH_P_IPV6): + gpe->next_protocol = VXLAN_GPE_NP_IPV6; + return 0; + case htons(ETH_P_TEB): + gpe->next_protocol = VXLAN_GPE_NP_ETHERNET; + return 0; + case htons(ETH_P_NSH): + gpe->next_protocol = VXLAN_GPE_NP_NSH; + return 0; + } + return -EPFNOSUPPORT; +} + #if IS_ENABLED(CONFIG_IPV6) static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, @@ -1036,6 +1142,7 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk, int err; bool udp_sum = !(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX); int type = 0; + __be16 inner_protocol = htons(ETH_P_TEB); if ((vxflags & VXLAN_F_REMCSUM_TX) && skb->ip_summed == CHECKSUM_PARTIAL) { @@ -1106,8 +1213,14 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk, if (vxflags & VXLAN_F_GBP) vxlan_build_gbp_hdr(vxh, vxflags, md); + if (vxflags & VXLAN_F_GPE) { + err = vxlan_build_gpe_hdr(vxh, vxflags, htons(ETH_P_TEB)); + if (err < 0) + goto err; + inner_protocol = htons(ETH_P_TEB); + } - ovs_skb_set_inner_protocol(skb, htons(ETH_P_TEB)); + ovs_skb_set_inner_protocol(skb, inner_protocol); udp_tunnel6_xmit_skb(dst, sk, skb, dev, saddr, daddr, prio, ttl, src_port, dst_port, @@ -1129,6 +1242,7 @@ static int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *sk int err; bool udp_sum = !!(vxflags & VXLAN_F_UDP_CSUM); int type = 0; + __be16 inner_protocol = htons(ETH_P_TEB); if ((vxflags & VXLAN_F_REMCSUM_TX) && skb->ip_summed == CHECKSUM_PARTIAL) { @@ -1191,8 +1305,14 @@ static int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *sk } if (vxflags & VXLAN_F_GBP) vxlan_build_gbp_hdr(vxh, vxflags, md); - - ovs_skb_set_inner_protocol(skb, htons(ETH_P_TEB)); + if (vxflags & VXLAN_F_GPE) { + err = vxlan_build_gpe_hdr(vxh, vxflags, htons(ETH_P_TEB)); + if (err < 0) + return err; + inner_protocol = htons(ETH_P_TEB); + } + + ovs_skb_set_inner_protocol(skb, inner_protocol); return udp_tunnel_xmit_skb(rt, sk, skb, src, dst, tos, ttl, df, src_port, dst_port, xnet, diff --git a/datapath/vport-netdev.c b/datapath/vport-netdev.c index ddd3f5c..08c09f4 100644 --- a/datapath/vport-netdev.c +++ b/datapath/vport-netdev.c @@ -102,7 +102,8 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name) } if (vport->dev->flags & IFF_LOOPBACK || - vport->dev->type != ARPHRD_ETHER || + (vport->dev->type != ARPHRD_ETHER && + vport->dev->type != ARPHRD_NONE) || ovs_is_internal_dev(vport->dev)) { err = -EINVAL; goto error_put; diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c index c05f5d4..c59dbd6 100644 --- a/datapath/vport-vxlan.c +++ b/datapath/vport-vxlan.c @@ -52,13 +52,26 @@ static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb) return -EMSGSIZE; nla_nest_end(skb, exts); - } + } else if (vxlan->flags & VXLAN_F_GPE) { + struct nlattr *exts; + + exts = nla_nest_start(skb, OVS_TUNNEL_ATTR_EXTENSION); + if (!exts) + return -EMSGSIZE; + + if (vxlan->flags & VXLAN_F_GPE && + nla_put_flag(skb, OVS_VXLAN_EXT_GPE)) + return -EMSGSIZE; + + nla_nest_end(skb, exts); + } return 0; } static const struct nla_policy exts_policy[OVS_VXLAN_EXT_MAX + 1] = { [OVS_VXLAN_EXT_GBP] = { .type = NLA_FLAG, }, + [OVS_VXLAN_EXT_GPE] = { .type = NLA_FLAG, }, }; static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr, @@ -76,6 +89,8 @@ static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr, if (exts[OVS_VXLAN_EXT_GBP]) conf->flags |= VXLAN_F_GBP; + else if (exts[OVS_VXLAN_EXT_GPE]) + conf->flags |= VXLAN_F_GPE; return 0; } diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c index 83a795c..4819a00 100644 --- a/lib/netdev-vport.c +++ b/lib/netdev-vport.c @@ -535,7 +535,9 @@ set_tunnel_config(struct netdev *dev_, const struct smap *args) while (ext) { if (!strcmp(type, "vxlan") && !strcmp(ext, "gbp")) { tnl_cfg.exts |= (1 << OVS_VXLAN_EXT_GBP); - } else { + } else if (!strcmp(type, "vxlan") && !strcmp(ext, "gpe")) { + tnl_cfg.exts |= (1 << OVS_VXLAN_EXT_GPE); + } else { VLOG_WARN("%s: unknown extension '%s'", name, ext); } -- 1.9.3 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev