This patch adds support for Network Service Headers (nsh) over VXLAN
as mentioned in [1]. Here changes are made to datapath to add nsh
headers whenever a vxlan port with destination port as 6633 (which is
IANA allocated port for nsh over vxlan) is created.

[1] http://tools.ietf.org/html/draft-quinn-sfc-nsh-02

Signed-off-by: Pritesh Kothari <pritesh.koth...@cisco.com>

 create mode 100644 datapath/linux/compat/include/net/nsh.h

diff --git a/datapath/datapath.c b/datapath/datapath.c
index f7c3391..b96ad1e 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -361,6 +361,7 @@ static size_t key_attr_size(void)
                  + nla_total_size(1)   /* OVS_TUNNEL_KEY_ATTR_TTL */
                  + nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
                  + nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_CSUM */
+                 + nla_total_size(4)   /* OVS_TUNNEL_KEY_ATTR_NSP */
                + nla_total_size(4)   /* OVS_KEY_ATTR_IN_PORT */
                + nla_total_size(4)   /* OVS_KEY_ATTR_SKB_MARK */
                + nla_total_size(12)  /* OVS_KEY_ATTR_ETHERNET */
diff --git a/datapath/flow.h b/datapath/flow.h
index 270a324..6a342be 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -41,9 +41,13 @@ struct sk_buff;
 #define OVS_TUNNEL_KEY_SIZE                                    \
         (offsetof(struct ovs_key_ipv4_tunnel, ipv4_ttl) +      \
          FIELD_SIZEOF(struct ovs_key_ipv4_tunnel, ipv4_ttl))
+/* Used for masking nsp and nsi values in field nsp below */
+#define NSH_M_NSP      0xFFFFFF00
+#define NSH_M_NSI      0x000000FF
 
 struct ovs_key_ipv4_tunnel {
        __be64 tun_id;
+       __be32 nsp;      /* it contains (nsp - 24 bits | nsi - 8 bits) here */
        __be32 ipv4_src;
        __be32 ipv4_dst;
        __be16 tun_flags;
@@ -53,9 +57,10 @@ struct ovs_key_ipv4_tunnel {
 
 static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key,
                                         const struct iphdr *iph, __be64 tun_id,
-                                        __be16 tun_flags)
+                                        __be32 nsp, __be16 tun_flags)
 {
        tun_key->tun_id = tun_id;
+       tun_key->nsp = nsp;
        tun_key->ipv4_src = iph->saddr;
        tun_key->ipv4_dst = iph->daddr;
        tun_key->ipv4_tos = iph->tos;
diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
index 40751cb..a2bc1e9 100644
--- a/datapath/flow_netlink.c
+++ b/datapath/flow_netlink.c
@@ -333,6 +333,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
        int rem;
        bool ttl = false;
        __be16 tun_flags = 0;
+       __be32 nsp = 0;
 
        nla_for_each_nested(a, attr, rem) {
                int type = nla_type(a);
@@ -344,6 +345,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
                        [OVS_TUNNEL_KEY_ATTR_TTL] = 1,
                        [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
                        [OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
+                       [OVS_TUNNEL_KEY_ATTR_NSP] = sizeof(u32),
                };
 
                if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
@@ -388,11 +390,16 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
                case OVS_TUNNEL_KEY_ATTR_CSUM:
                        tun_flags |= TUNNEL_CSUM;
                        break;
+               case OVS_TUNNEL_KEY_ATTR_NSP:
+                       nsp = htonl(be32_to_cpu(nla_get_be32(a)) << 8);
+                       tun_flags |= TUNNEL_NSP;
+                       break;
                default:
                        return -EINVAL;
                }
        }
 
+       SW_FLOW_KEY_PUT(match, tun_key.nsp, nsp, is_mask);
        SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
 
        if (rem > 0) {
@@ -420,6 +427,7 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,
                              const struct ovs_key_ipv4_tunnel *output)
 {
        struct nlattr *nla;
+       __be32 nsp = cpu_to_be32(ntohl(output->nsp) >> 8);
 
        nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
        if (!nla)
@@ -445,6 +453,9 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,
        if ((output->tun_flags & TUNNEL_CSUM) &&
                nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
                return -EMSGSIZE;
+       if (output->tun_flags & TUNNEL_NSP &&
+           nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_NSP, nsp))
+               return -EMSGSIZE;
 
        nla_nest_end(skb, nla);
        return 0;
diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk
index cedb8c9..94910b9 100644
--- a/datapath/linux/Modules.mk
+++ b/datapath/linux/Modules.mk
@@ -71,4 +71,5 @@ openvswitch_headers += \
        linux/compat/include/net/netlink.h \
        linux/compat/include/net/sock.h \
        linux/compat/include/net/vxlan.h \
+       linux/compat/include/net/nsh.h \
        linux/compat/include/net/sctp/checksum.h
diff --git a/datapath/linux/compat/include/net/ip_tunnels.h 
b/datapath/linux/compat/include/net/ip_tunnels.h
index a786aa9..a4aec4d 100644
--- a/datapath/linux/compat/include/net/ip_tunnels.h
+++ b/datapath/linux/compat/include/net/ip_tunnels.h
@@ -20,6 +20,7 @@
 #define TUNNEL_VERSION __cpu_to_be16(0x40)
 #define TUNNEL_NO_KEY  __cpu_to_be16(0x80)
 #define TUNNEL_DONT_FRAGMENT   __cpu_to_be16(0x0100)
+#define TUNNEL_NSP     __cpu_to_be16(0x0200)
 
 struct tnl_ptk_info {
        __be16 flags;
diff --git a/datapath/linux/compat/include/net/nsh.h 
b/datapath/linux/compat/include/net/nsh.h
new file mode 100644
index 0000000..bdc81b4
--- /dev/null
+++ b/datapath/linux/compat/include/net/nsh.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2013, 2014 Cisco Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef NSH_H
+#define NSH_H 1
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+
+/**
+ * struct nsh_bhdr - Network Service Base Header.
+ * @o: Operations and Management Packet indicator bit
+ * @c: If this bit is set then one or more contexts are in use.
+ * @proto: IEEE Ethertypes to indicate the frame within.
+ * @svc_idx: TTL functionality and location within service path.
+ * @svc_path: To uniquely identify service path.
+ */
+struct nsh_base {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       __u8    res:6,
+               c:1,
+               o:1;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+       __u8    o:1,
+               c:1,
+               res:6;
+#else
+#error "Bitfield Endianess not defined."
+#endif
+       __u8    res1;
+       __be16  proto;
+       union {
+               struct {
+                       __u8    svc_path[3];
+                       __u8    svc_idx;
+               };
+               __be32 b2;
+       };
+};
+
+/**
+ * struct nsh_ctx - Keeps track of NSH context data
+ * @npc: NSH network platform context
+ * @nsc: NSH network shared context
+ * @spc: NSH service platform context
+ * @ssc: NSH service shared context
+ */
+struct nsh_ctx {
+       __be32 npc;
+       __be32 nsc;
+       __be32 spc;
+       __be32 ssc;
+};
+
+/**
+ * struct nshdr - Network Service header
+ * @nsh_base: Network Service Base Header.
+ * @nsh_ctx: Network Service Context Header.
+ */
+struct nshhdr {
+       struct nsh_base b;
+       struct nsh_ctx c;
+};
+
+
+#define ETH_P_NSH      0x894F /* Ethertype for NSH */
+#define NSH_P_TEB      0x6558 /* Transparent Ethernet Bridging */
+#define NSH_DST_PORT   6633   /* UDP Port for NSH on VXLAN */
+
+
+#endif /* nsh.h */
diff --git a/datapath/linux/compat/include/net/vxlan.h 
b/datapath/linux/compat/include/net/vxlan.h
index 3ac816b..1c15dfb 100644
--- a/datapath/linux/compat/include/net/vxlan.h
+++ b/datapath/linux/compat/include/net/vxlan.h
@@ -4,9 +4,11 @@
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/udp.h>
+#include <net/nsh.h>
 
 struct vxlan_sock;
-typedef void (vxlan_rcv_t)(struct vxlan_sock *vs, struct sk_buff *skb, __be32 
key);
+typedef void (vxlan_rcv_t)(struct vxlan_sock *vs, struct sk_buff *skb,
+                          __be32 key, __be32 nsp);
 
 /* per UDP socket information */
 struct vxlan_sock {
@@ -27,7 +29,7 @@ void vxlan_sock_release(struct vxlan_sock *vs);
 int vxlan_xmit_skb(struct vxlan_sock *vs,
                   struct rtable *rt, struct sk_buff *skb,
                   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
-                  __be16 src_port, __be16 dst_port, __be32 vni);
+                  __be16 src_port, __be16 dst_port, __be32 vni, __be32 nsp);
 
 __be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb);
 
diff --git a/datapath/linux/compat/vxlan.c b/datapath/linux/compat/vxlan.c
index 64877e0..0c3b9f9 100644
--- a/datapath/linux/compat/vxlan.c
+++ b/datapath/linux/compat/vxlan.c
@@ -50,6 +50,7 @@
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 #include <net/vxlan.h>
+#include <net/nsh.h>
 
 #include "compat.h"
 #include "datapath.h"
@@ -57,6 +58,9 @@
 #include "vlan.h"
 
 #define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
+#define NSH_HLEN (sizeof(struct udphdr) + \
+                 sizeof(struct vxlanhdr) + \
+                 sizeof(struct nshhdr))
 
 #define VXLAN_FLAGS 0x08000000 /* struct vxlanhdr.vx_flags required value. */
 
@@ -66,18 +70,35 @@ struct vxlanhdr {
        __be32 vx_vni;
 };
 
+static inline struct vxlanhdr *vxlan_hdr(const struct sk_buff *skb)
+{
+       return (struct vxlanhdr *)(udp_hdr(skb) + 1);
+}
+
+static inline struct nshhdr *nsh_hdr(const struct sk_buff *skb)
+{
+       return (struct nshhdr *)(vxlan_hdr(skb) + 1);
+}
+
 /* Callback from net/ipv4/udp.c to receive packets */
 static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 {
        struct vxlan_sock *vs;
        struct vxlanhdr *vxh;
+       struct udphdr *udp;
+       bool isnsh = false;
+       __be32 nsp = 0;
+
+       udp = (struct udphdr *)udp_hdr(skb);
+       if (udp->dest == htons(NSH_DST_PORT))
+               isnsh = true;
 
        /* Need Vxlan and inner Ethernet header to be present */
-       if (!pskb_may_pull(skb, VXLAN_HLEN))
+       if (!pskb_may_pull(skb, isnsh ? NSH_HLEN : VXLAN_HLEN))
                goto error;
 
        /* Return packets with reserved bits set */
-       vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
+       vxh = vxlan_hdr(skb);
        if (vxh->vx_flags != htonl(VXLAN_FLAGS) ||
            (vxh->vx_vni & htonl(0xff))) {
                pr_warn("invalid vxlan flags=%#x vni=%#x\n",
@@ -85,14 +106,26 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct 
sk_buff *skb)
                goto error;
        }
 
-       if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB)))
+       if (isnsh) {
+               struct nshhdr *nsh = nsh_hdr(skb);
+
+               if (unlikely(nsh->b.svc_idx == 0)) {
+                       pr_warn("NSH service index reached zero\n");
+                       goto drop;
+               }
+
+               nsp = nsh->b.b2; /* same as svc_path | htonl(svc_idx) */
+       }
+
+       if (iptunnel_pull_header(skb, isnsh ? NSH_HLEN : VXLAN_HLEN,
+                                htons(ETH_P_TEB)))
                goto drop;
 
        vs = rcu_dereference_sk_user_data(sk);
        if (!vs)
                goto drop;
 
-       vs->rcv(vs, skb, vxh->vx_vni);
+       vs->rcv(vs, skb, vxh->vx_vni, nsp);
        return 0;
 
 drop:
@@ -176,15 +209,16 @@ static int handle_offloads(struct sk_buff *skb)
 int vxlan_xmit_skb(struct vxlan_sock *vs,
                   struct rtable *rt, struct sk_buff *skb,
                   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
-                  __be16 src_port, __be16 dst_port, __be32 vni)
+                  __be16 src_port, __be16 dst_port, __be32 vni, __be32 nsp)
 {
+       bool isnsh = (dst_port == htons(NSH_DST_PORT));
        struct vxlanhdr *vxh;
        struct udphdr *uh;
        int min_headroom;
        int err;
 
        min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
-                       + VXLAN_HLEN + sizeof(struct iphdr)
+                       + (isnsh ? NSH_HLEN : VXLAN_HLEN) + sizeof(struct iphdr)
                        + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
 
        /* Need space for new headers (invalidates iph ptr) */
@@ -203,6 +237,21 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 
        skb_reset_inner_headers(skb);
 
+       if (isnsh) {
+               struct nshhdr *nsh;
+               uint8_t nsi = ntohl(nsp) & NSH_M_NSI;
+
+               nsh = (struct nshhdr *) __skb_push(skb, sizeof(*nsh));
+               nsh->b.o = 0;
+               nsh->b.res = 0;
+               nsh->b.proto = htons(NSH_P_TEB);
+               /* b2 should precede svc_idx, else svc_idx will be zero */
+               nsh->b.b2 = nsp & htonl(NSH_M_NSP);
+               nsh->b.svc_idx = nsi ? nsi : 0x01;
+               nsh->b.c = 0;
+               memset(&nsh->c, 0x00, sizeof nsh->c);
+       }
+
        vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
        vxh->vx_flags = htonl(VXLAN_FLAGS);
        vxh->vx_vni = vni;
diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c
index 8737b63..ca1dc3a 100644
--- a/datapath/vport-gre.c
+++ b/datapath/vport-gre.c
@@ -110,7 +110,8 @@ static int gre_rcv(struct sk_buff *skb,
                return PACKET_REJECT;
 
        key = key_to_tunnel_id(tpi->key, tpi->seq);
-       ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key, 
filter_tnl_flags(tpi->flags));
+       ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key, 0,
+                             filter_tnl_flags(tpi->flags));
 
        ovs_vport_receive(vport, skb, &tun_key);
        return PACKET_RCVD;
diff --git a/datapath/vport-lisp.c b/datapath/vport-lisp.c
index c2698ae..6edb920 100644
--- a/datapath/vport-lisp.c
+++ b/datapath/vport-lisp.c
@@ -237,7 +237,7 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb)
 
        /* Save outer tunnel values */
        iph = ip_hdr(skb);
-       ovs_flow_tun_key_init(&tun_key, iph, key, TUNNEL_KEY);
+       ovs_flow_tun_key_init(&tun_key, iph, key, 0, TUNNEL_KEY);
 
        /* Drop non-IP inner packets */
        inner_iph = (struct iphdr *)(lisph + 1);
diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c
index ab2b6f7..999fdff 100644
--- a/datapath/vport-vxlan.c
+++ b/datapath/vport-vxlan.c
@@ -58,7 +58,8 @@ static inline struct vxlan_port *vxlan_vport(const struct 
vport *vport)
 }
 
 /* Called with rcu_read_lock and BH disabled. */
-static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 
vx_vni)
+static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
+                     __be32 vx_vni, __be32 nsp)
 {
        struct ovs_key_ipv4_tunnel tun_key;
        struct vport *vport = vs->data;
@@ -68,7 +69,8 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff 
*skb, __be32 vx_vni)
        /* Save outer tunnel values */
        iph = ip_hdr(skb);
        key = cpu_to_be64(ntohl(vx_vni) >> 8);
-       ovs_flow_tun_key_init(&tun_key, iph, key, TUNNEL_KEY);
+       ovs_flow_tun_key_init(&tun_key, iph, key, nsp,
+                             TUNNEL_KEY | TUNNEL_NSP);
 
        ovs_vport_receive(vport, skb, &tun_key);
 }
@@ -180,7 +182,8 @@ static int vxlan_tnl_send(struct vport *vport, struct 
sk_buff *skb)
                             OVS_CB(skb)->tun_key->ipv4_tos,
                             OVS_CB(skb)->tun_key->ipv4_ttl, df,
                             src_port, dst_port,
-                            htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 
8));
+                            htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 
8),
+                            OVS_CB(skb)->tun_key->nsp);
        if (err < 0)
                ip_rt_put(rt);
 error:
diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
index d1ff5ec..194acfe 100644
--- a/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h
@@ -328,6 +328,7 @@ enum ovs_tunnel_key_attr {
        OVS_TUNNEL_KEY_ATTR_TTL,                /* u8 Tunnel IP TTL. */
        OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT,      /* No argument, set DF. */
        OVS_TUNNEL_KEY_ATTR_CSUM,               /* No argument. CSUM packet. */
+       OVS_TUNNEL_KEY_ATTR_NSP,                /* be32 NSH svc path (lower 24 
bits) */
        __OVS_TUNNEL_KEY_ATTR_MAX
 };
 
-- 
1.7.9.5

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to