Support the Generic Protocol Extension to VxLAN which extends VxLAN to
allow multi-protocol encapsulation. IPv4, IPv6, MPLS unicast and
NSH encapsulated packets can be sent and received in addition to ethernet
frames. As defined in:

https://tools.ietf.org/html/draft-ietf-nvo3-vxlan-gpe-01

Signed-off-by: Brian Russell <bruss...@brocade.com>
---
 drivers/net/vxlan.c          | 139 +++++++++++++++++++++++++++++++++++++++----
 include/net/vxlan.h          |  40 ++++++++++++-
 include/uapi/linux/if_link.h |   1 +
 3 files changed, 166 insertions(+), 14 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index ebf57d9..e6a6bfb 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -50,6 +50,7 @@
 #include <net/ip6_checksum.h>
 #endif
 #include <net/dst_metadata.h>
+#include <net/nsh.h>
 
 #define VXLAN_VERSION  "0.1"
 
@@ -1168,14 +1169,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct 
sk_buff *skb,
        if (!vxlan)
                goto drop;
 
-       skb_reset_mac_header(skb);
        skb_scrub_packet(skb, !net_eq(vxlan->net, dev_net(vxlan->dev)));
-       skb->protocol = eth_type_trans(skb, vxlan->dev);
-       skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
-
-       /* Ignore packet loops (and multicast echo) */
-       if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr))
-               goto drop;
 
        /* Get data from the outer IP header */
        if (vxlan_get_sk_family(vs) == AF_INET) {
@@ -1195,13 +1189,57 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct 
sk_buff *skb,
                tun_dst = NULL;
        }
 
+       switch (md->gpe_np) {
+       case VXLAN_GPE_NP_IPv4:
+               skb->protocol = htons(ETH_P_IP);
+               goto skip_l2;
+#if IS_ENABLED(CONFIG_IPV6)
+       case VXLAN_GPE_NP_IPv6:
+               skb->protocol = htons(ETH_P_IPV6);
+               goto skip_l2;
+#endif
+#if IS_ENABLED(CONFIG_MPLS)
+       case VXLAN_GPE_NP_MPLS:
+               skb->protocol = htons(ETH_P_MPLS_UC);
+               goto skip_l2;
+#endif
+#if IS_ENABLED(CONFIG_NET_NSH)
+       case VXLAN_GPE_NP_NSH:
+               {
+                       u8 next_proto;
+
+                       if (nsh_decap(skb, NULL, NULL, &next_proto) < 0)
+                               goto drop;
+
+                       if (next_proto != NSH_NEXT_PROTO_ETH)
+                               goto skip_l2;
+               }
+               break;
+#endif
+       case VXLAN_GPE_NP_ETH:
+               /* GPE with next proto eth is equivalent to vanilla vxlan. */
+       default:
+               break;
+       }
+
+       skb_reset_mac_header(skb);
+       skb->protocol = eth_type_trans(skb, vxlan->dev);
+       skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+
+       /* Ignore packet loops (and multicast echo) */
+       if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr))
+               goto drop;
+
        if ((vxlan->flags & VXLAN_F_LEARN) &&
            vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source))
                goto drop;
 
+skip_l2:
        skb_reset_network_header(skb);
+
        /* In flow-based mode, GBP is carried in dst_metadata */
-       if (!(vs->flags & VXLAN_F_COLLECT_METADATA))
+       if (!(vs->flags & VXLAN_F_COLLECT_METADATA) &&
+           !(vs->flags & VXLAN_F_GPE))
                skb->mark = md->gbp;
 
        if (oip6)
@@ -1252,6 +1290,10 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct 
sk_buff *skb)
        struct vxlan_metadata _md;
        struct vxlan_metadata *md = &_md;
 
+       vs = rcu_dereference_sk_user_data(sk);
+       if (!vs)
+               goto drop;
+
        /* Need Vxlan and inner Ethernet header to be present */
        if (!pskb_may_pull(skb, VXLAN_HLEN))
                goto error;
@@ -1267,14 +1309,13 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct 
sk_buff *skb)
                goto bad_flags;
        }
 
-       if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB)))
+       /* If GPE, protocol will be set once next proto examined. */
+       if (iptunnel_pull_header(skb, VXLAN_HLEN,
+                                vs->flags & VXLAN_F_GPE ?
+                                htons(ETH_P_IP) : htons(ETH_P_TEB)))
                goto drop;
        vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
 
-       vs = rcu_dereference_sk_user_data(sk);
-       if (!vs)
-               goto drop;
-
        if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) {
                vxh = vxlan_remcsum(skb, vxh, sizeof(struct vxlanhdr), vni,
                                    !!(vs->flags & VXLAN_F_REMCSUM_NOPARTIAL));
@@ -1318,6 +1359,16 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct 
sk_buff *skb)
                flags &= ~VXLAN_GBP_USED_BITS;
        }
 
+       if (vs->flags & VXLAN_F_GPE) {
+               /* Next protocol is required */
+               if (!(flags & VXLAN_HF_GPE_NP))
+                       goto bad_flags;
+
+               md->gpe_np = flags & VXLAN_GPE_NP_MASK;
+
+               flags &= ~VXLAN_GPE_USED_BITS;
+       }
+
        if (flags || vni & ~VXLAN_VNI_MASK) {
                /* If there are any unprocessed flags remaining treat
                 * this as a malformed packet. This behavior diverges from
@@ -1664,6 +1715,37 @@ static bool route_shortcircuit(struct net_device *dev, 
struct sk_buff *skb)
        return false;
 }
 
+static void vxlan_build_gpe_hdr(struct vxlanhdr *vxh, __be16 proto)
+{
+       u32 next_proto;
+
+       switch (proto) {
+#if IS_ENABLED(CONFIG_NET_NSH)
+       case htons(ETH_P_NSH):
+               next_proto = VXLAN_GPE_NP_NSH;
+               break;
+#endif
+       case htons(ETH_P_IP):
+               next_proto = VXLAN_GPE_NP_IPv4;
+               break;
+#if IS_ENABLED(CONFIG_IPV6)
+       case htons(ETH_P_IPV6):
+               next_proto = VXLAN_GPE_NP_IPv6;
+               break;
+#endif
+#if IS_ENABLED(CONFIG_MPLS)
+       case htons(ETH_P_MPLS_UC):
+               next_proto = VXLAN_GPE_NP_MPLS;
+               break;
+#endif
+       default:
+               next_proto = VXLAN_GPE_NP_ETH;
+               break;
+       }
+
+       vxh->vx_flags |= htonl(VXLAN_HF_GPE_NP | next_proto);
+}
+
 static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags,
                                struct vxlan_metadata *md)
 {
@@ -1750,6 +1832,9 @@ static int vxlan_build_skb(struct sk_buff *skb, struct 
dst_entry *dst,
        if (vxflags & VXLAN_F_GBP)
                vxlan_build_gbp_hdr(vxh, vxflags, md);
 
+       if (vxflags & VXLAN_F_GPE)
+               vxlan_build_gpe_hdr(vxh, skb->protocol);
+
        skb_set_inner_protocol(skb, htons(ETH_P_TEB));
        return 0;
 }
@@ -2073,6 +2158,26 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, 
struct net_device *dev)
        struct vxlan_rdst *rdst, *fdst = NULL;
        struct vxlan_fdb *f;
 
+       if (vxlan->flags & VXLAN_F_GPE) {
+               switch (skb->protocol) {
+#if IS_ENABLED(CONFIG_NET_NSH)
+               case htons(ETH_P_NSH):
+#endif
+#if IS_ENABLED(CONFIG_IPV6)
+               case htons(ETH_P_IPV6):
+#endif
+#if IS_ENABLED(CONFIG_MPLS)
+               case htons(ETH_P_MPLS_UC):
+#endif
+               case htons(ETH_P_IP):
+                       vxlan_xmit_one(skb, dev, &vxlan->default_dst, false);
+                       return NETDEV_TX_OK;
+               default:
+                       /* Assume L2 and look for FDB entry */
+                       break;
+               }
+       }
+
        info = skb_tunnel_info(skb);
 
        skb_reset_mac_header(skb);
@@ -2474,6 +2579,7 @@ static const struct nla_policy 
vxlan_policy[IFLA_VXLAN_MAX + 1] = {
        [IFLA_VXLAN_REMCSUM_TX] = { .type = NLA_U8 },
        [IFLA_VXLAN_REMCSUM_RX] = { .type = NLA_U8 },
        [IFLA_VXLAN_GBP]        = { .type = NLA_FLAG, },
+       [IFLA_VXLAN_GPE]        = { .type = NLA_FLAG, },
        [IFLA_VXLAN_REMCSUM_NOPARTIAL]  = { .type = NLA_FLAG },
 };
 
@@ -2892,6 +2998,9 @@ static int vxlan_newlink(struct net *src_net, struct 
net_device *dev,
        if (data[IFLA_VXLAN_GBP])
                conf.flags |= VXLAN_F_GBP;
 
+       if (data[IFLA_VXLAN_GPE])
+               conf.flags |= VXLAN_F_GPE;
+
        if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL])
                conf.flags |= VXLAN_F_REMCSUM_NOPARTIAL;
 
@@ -3033,6 +3142,10 @@ static int vxlan_fill_info(struct sk_buff *skb, const 
struct net_device *dev)
            nla_put_flag(skb, IFLA_VXLAN_GBP))
                goto nla_put_failure;
 
+       if (vxlan->flags & VXLAN_F_GPE &&
+           nla_put_flag(skb, IFLA_VXLAN_GPE))
+               goto nla_put_failure;
+
        if (vxlan->flags & VXLAN_F_REMCSUM_NOPARTIAL &&
            nla_put_flag(skb, IFLA_VXLAN_REMCSUM_NOPARTIAL))
                goto nla_put_failure;
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 25bd919..7886296 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -121,8 +121,44 @@ struct vxlanhdr_gbp {
 
 struct vxlan_metadata {
        u32             gbp;
+       u8              gpe_np;
 };
 
+/*
+ * VXLAN Generic Protocol Extension:
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |R|R|Ver|I|P|R|O|       Reserved                |Next Protocol  |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                VXLAN Network Identifier (VNI) |   Reserved    |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Ver            Version, initially 0
+ * I = 1         VXLAN Network Identifier (VNI) present
+ * P = 1          Next Protocol field is present
+ * O = 1          OAM
+ * Next Protocol  Indicates the protocol header immediately following
+ *                the VXLAN GPE header.
+ *
+ * https://tools.ietf.org/html/draft-ietf-nvo3-vxlan-gpe-01
+ *
+ * Use struct vxlanhdr above with some extra defines:
+ */
+#define VXLAN_HF_GPE_OAM BIT(25) /* GPE OAM bit */
+#define VXLAN_HF_GPE_NP  BIT(26) /* GPE protocol bit */
+
+#define VXLAN_GPE_NP_MASK (0xFF)
+
+#define VXLAN_GPE_NP_IPv4 0x1
+#define VXLAN_GPE_NP_IPv6 0x2
+#define VXLAN_GPE_NP_ETH  0x3
+#define VXLAN_GPE_NP_NSH  0x4
+#define VXLAN_GPE_NP_MPLS  0x5
+
+#define VXLAN_GPE_USED_BITS (VXLAN_HF_GPE_NP  | \
+                            VXLAN_HF_GPE_OAM | \
+                            VXLAN_GPE_NP_MASK)
+
+
 /* per UDP socket information */
 struct vxlan_sock {
        struct hlist_node hlist;
@@ -204,6 +240,7 @@ struct vxlan_dev {
 #define VXLAN_F_GBP                    0x800
 #define VXLAN_F_REMCSUM_NOPARTIAL      0x1000
 #define VXLAN_F_COLLECT_METADATA       0x2000
+#define VXLAN_F_GPE                    0x4000
 
 /* Flags that are used in the receive path. These flags must match in
  * order for a socket to be shareable
@@ -212,7 +249,8 @@ struct vxlan_dev {
                                         VXLAN_F_UDP_ZERO_CSUM6_RX |    \
                                         VXLAN_F_REMCSUM_RX |           \
                                         VXLAN_F_REMCSUM_NOPARTIAL |    \
-                                        VXLAN_F_COLLECT_METADATA)
+                                        VXLAN_F_COLLECT_METADATA |     \
+                                        VXLAN_F_GPE)
 
 struct net_device *vxlan_dev_create(struct net *net, const char *name,
                                    u8 name_assign_type, struct vxlan_config 
*conf);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index d452cea..e8d74a5 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -456,6 +456,7 @@ enum {
        IFLA_VXLAN_GBP,
        IFLA_VXLAN_REMCSUM_NOPARTIAL,
        IFLA_VXLAN_COLLECT_METADATA,
+       IFLA_VXLAN_GPE,
        __IFLA_VXLAN_MAX
 };
 #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
-- 
2.1.4

Reply via email to