This patch adds support for Network Service Headers (nsh) over VXLAN
as mentioned in [1]. Here changes are made to datapath to add nsh
headers whenever a vxlan port with destination port as 9030 is created.
IANA port allocation for nsh over vxlan is yet to be done.

[1] http://tools.ietf.org/html/draft-quinn-nsh-01

Signed-off-by: pritesh <pritesh.koth...@cisco.com>

 create mode 100644 datapath/linux/compat/include/net/nsh.h

diff --git a/datapath/datapath.c b/datapath/datapath.c
index 4defcdb..285b571 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -369,6 +369,7 @@ static size_t key_attr_size(void)
                  + nla_total_size(1)   /* OVS_TUNNEL_KEY_ATTR_TTL */
                  + nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
                  + nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_CSUM */
+                 + nla_total_size(4)   /* OVS_TUNNEL_KEY_ATTR_NSP */
                + nla_total_size(4)   /* OVS_KEY_ATTR_IN_PORT */
                + nla_total_size(4)   /* OVS_KEY_ATTR_SKB_MARK */
                + nla_total_size(12)  /* OVS_KEY_ATTR_ETHERNET */
diff --git a/datapath/flow.c b/datapath/flow.c
index 29122af..4f47a48 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -1235,6 +1235,7 @@ int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
        int rem;
        bool ttl = false;
        __be16 tun_flags = 0;
+       __be32 nsp = 0;
 
        nla_for_each_nested(a, attr, rem) {
                int type = nla_type(a);
@@ -1246,6 +1247,7 @@ int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
                        [OVS_TUNNEL_KEY_ATTR_TTL] = 1,
                        [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
                        [OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
+                       [OVS_TUNNEL_KEY_ATTR_NSP] = sizeof(u32),
                };
 
                if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
@@ -1290,11 +1292,16 @@ int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
                case OVS_TUNNEL_KEY_ATTR_CSUM:
                        tun_flags |= TUNNEL_CSUM;
                        break;
+               case OVS_TUNNEL_KEY_ATTR_NSP:
+                       nsp |= htonl(be32_to_cpu(nla_get_be32(a)) << 8);
+                       tun_flags |= TUNNEL_NSP;
+                       break;
                default:
                        return -EINVAL;
                }
        }
 
+       SW_FLOW_KEY_PUT(match, tun_key.nsp, nsp, is_mask);
        SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
 
        if (rem > 0) {
@@ -1322,6 +1329,7 @@ int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb,
                           const struct ovs_key_ipv4_tunnel *output)
 {
        struct nlattr *nla;
+       __be32 nsp = cpu_to_be32(ntohl(output->nsp) >> 8);
 
        nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
        if (!nla)
@@ -1347,6 +1355,9 @@ int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb,
        if ((output->tun_flags & TUNNEL_CSUM) &&
                nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
                return -EMSGSIZE;
+       if (output->tun_flags & TUNNEL_NSP &&
+           nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_NSP, nsp))
+               return -EMSGSIZE;
 
        nla_nest_end(skb, nla);
        return 0;
diff --git a/datapath/flow.h b/datapath/flow.h
index 03eae03..b316e0a 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -51,6 +51,7 @@ struct sw_flow_actions {
 
 struct ovs_key_ipv4_tunnel {
        __be64 tun_id;
+       __be32 nsp;
        __be32 ipv4_src;
        __be32 ipv4_dst;
        __be16 tun_flags;
@@ -60,9 +61,10 @@ struct ovs_key_ipv4_tunnel {
 
 static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key,
                                         const struct iphdr *iph, __be64 tun_id,
-                                        __be16 tun_flags)
+                                        __be32 nsp, __be16 tun_flags)
 {
        tun_key->tun_id = tun_id;
+       tun_key->nsp = nsp;
        tun_key->ipv4_src = iph->saddr;
        tun_key->ipv4_dst = iph->daddr;
        tun_key->ipv4_tos = iph->tos;
diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk
index 057e1d5..7e0acce 100644
--- a/datapath/linux/Modules.mk
+++ b/datapath/linux/Modules.mk
@@ -65,4 +65,5 @@ openvswitch_headers += \
        linux/compat/include/net/net_namespace.h \
        linux/compat/include/net/netlink.h \
        linux/compat/include/net/vxlan.h \
+       linux/compat/include/net/nsh.h \
        linux/compat/include/net/sctp/checksum.h
diff --git a/datapath/linux/compat/include/net/ip_tunnels.h 
b/datapath/linux/compat/include/net/ip_tunnels.h
index a786aa9..a4aec4d 100644
--- a/datapath/linux/compat/include/net/ip_tunnels.h
+++ b/datapath/linux/compat/include/net/ip_tunnels.h
@@ -20,6 +20,7 @@
 #define TUNNEL_VERSION __cpu_to_be16(0x40)
 #define TUNNEL_NO_KEY  __cpu_to_be16(0x80)
 #define TUNNEL_DONT_FRAGMENT   __cpu_to_be16(0x0100)
+#define TUNNEL_NSP     __cpu_to_be16(0x0200)
 
 struct tnl_ptk_info {
        __be16 flags;
diff --git a/datapath/linux/compat/include/net/nsh.h 
b/datapath/linux/compat/include/net/nsh.h
new file mode 100644
index 0000000..bd4d7fb
--- /dev/null
+++ b/datapath/linux/compat/include/net/nsh.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2013 Cisco Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef NSH_H
+#define NSH_H 1
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+
+/**
+ * struct nsh_bhdr - Network Service Base Header.
+ * @o: Operations and Management Packet indicator bit
+ * @c: If this bit is set then one or more contexts are in use.
+ * @proto: IEEE Ethertypes to indicate the frame within.
+ * @svc_idx: TTL functionality and location within service path.
+ * @svc_path: To uniquely identify service path.
+ */
+struct nsh_base {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       __u8    res:6,
+               c:1,
+               o:1;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+       __u8    o:1,
+               c:1,
+               res:6;
+#else
+#error "Bitfield Endianess not defined."
+#endif
+       __be16  proto;
+       __u8    svc_idx;
+       __be32  svc_path;
+}__attribute__((packed));
+
+/**
+ * struct nsh_ctx - Keeps track of NSH context data
+ * @npc: NSH network platform context
+ * @nsc: NSH network shared context
+ * @spc: NSH service platform context
+ * @ssc: NSH service shared context
+ */
+struct nsh_ctx {
+       __be32 npc;
+       __be32 nsc;
+       __be32 spc;
+       __be32 ssc;
+};
+
+/**
+ * struct nshdr - Network Service header
+ * @nsh_base: Network Service Base Header.
+ * @nsh_ctx: Network Service Context Header.
+ */
+struct nshhdr {
+       struct nsh_base b;
+       struct nsh_ctx c;
+};
+
+
+/* NSH Header Length */
+#define NSH_HLEN (sizeof(struct udphdr) + \
+                 sizeof(struct vxlanhdr) + \
+                 sizeof(struct nshhdr))
+#define NSH_DST_PORT   9030   /* UDP Port for NSH on VXLAN */
+#define NSH_P_TEB      0x6558 /* Transparent Ethernet Bridging */
+#define NSH_M_NSP      0xFFFFFF00
+#define NSH_M_NSI      0x000000FF
+
+
+#endif /* nsh.h */
diff --git a/datapath/linux/compat/include/net/vxlan.h 
b/datapath/linux/compat/include/net/vxlan.h
index 3ac816b..1c15dfb 100644
--- a/datapath/linux/compat/include/net/vxlan.h
+++ b/datapath/linux/compat/include/net/vxlan.h
@@ -4,9 +4,11 @@
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/udp.h>
+#include <net/nsh.h>
 
 struct vxlan_sock;
-typedef void (vxlan_rcv_t)(struct vxlan_sock *vs, struct sk_buff *skb, __be32 
key);
+typedef void (vxlan_rcv_t)(struct vxlan_sock *vs, struct sk_buff *skb,
+                          __be32 key, __be32 nsp);
 
 /* per UDP socket information */
 struct vxlan_sock {
@@ -27,7 +29,7 @@ void vxlan_sock_release(struct vxlan_sock *vs);
 int vxlan_xmit_skb(struct vxlan_sock *vs,
                   struct rtable *rt, struct sk_buff *skb,
                   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
-                  __be16 src_port, __be16 dst_port, __be32 vni);
+                  __be16 src_port, __be16 dst_port, __be32 vni, __be32 nsp);
 
 __be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb);
 
diff --git a/datapath/linux/compat/vxlan.c b/datapath/linux/compat/vxlan.c
index 4f7671b..8a6d864 100644
--- a/datapath/linux/compat/vxlan.c
+++ b/datapath/linux/compat/vxlan.c
@@ -50,6 +50,7 @@
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 #include <net/vxlan.h>
+#include <net/nsh.h>
 
 #include "compat.h"
 #include "gso.h"
@@ -89,6 +90,16 @@ static inline struct hlist_head *vs_head(struct net *net, 
__be16 port)
        return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
 }
 
+static inline struct vxlanhdr *vxlan_hdr(const struct sk_buff *skb)
+{
+       return (struct vxlanhdr *)(udp_hdr(skb) + 1);
+}
+
+static inline struct nshhdr *nsh_hdr(const struct sk_buff *skb)
+{
+       return (struct nshhdr *)(vxlan_hdr(skb) + 1);
+}
+
 /* Find VXLAN socket based on network namespace and UDP port */
 
 static struct vxlan_sock *vxlan_find_sock(struct net *net, __be16 port)
@@ -107,13 +118,20 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct 
sk_buff *skb)
 {
        struct vxlan_sock *vs;
        struct vxlanhdr *vxh;
+       struct udphdr *udp;
+       bool isnsh = false;
+       __be32 nsp = 0;
+
+       udp = (struct udphdr *)udp_hdr(skb);
+       if (udp->dest == htons(NSH_DST_PORT))
+               isnsh = true;
 
        /* Need Vxlan and inner Ethernet header to be present */
-       if (!pskb_may_pull(skb, VXLAN_HLEN))
+       if (!pskb_may_pull(skb, isnsh ? NSH_HLEN : VXLAN_HLEN))
                goto error;
 
        /* Return packets with reserved bits set */
-       vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
+       vxh = vxlan_hdr(skb);
        if (vxh->vx_flags != htonl(VXLAN_FLAGS) ||
            (vxh->vx_vni & htonl(0xff))) {
                pr_warn("invalid vxlan flags=%#x vni=%#x\n",
@@ -121,14 +139,32 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct 
sk_buff *skb)
                goto error;
        }
 
-       if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB)))
+       if (isnsh) {
+               struct nshhdr *nsh = nsh_hdr(skb);
+
+               if (unlikely(nsh->b.svc_idx == 0)) {
+                       pr_warn("NSH service index reached zero\n");
+                       goto drop;
+               }
+
+               if (unlikely(nsh->b.svc_path & htonl(NSH_M_NSI))) {
+                       pr_warn("invalid NSH service path=%#x\n",
+                                       ntohl(nsh->b.svc_path));
+                       goto drop;
+               }
+
+               nsp = nsh->b.svc_path | htonl(nsh->b.svc_idx);
+       }
+
+       if (iptunnel_pull_header(skb, isnsh ? NSH_HLEN : VXLAN_HLEN,
+                                htons(ETH_P_TEB)))
                goto drop;
 
        vs = vxlan_find_sock(sock_net(sk), inet_sport(sk));
        if (!vs)
                goto drop;
 
-       vs->rcv(vs, skb, vxh->vx_vni);
+       vs->rcv(vs, skb, vxh->vx_vni, nsp);
        return 0;
 
 drop:
@@ -212,8 +248,9 @@ static int handle_offloads(struct sk_buff *skb)
 int vxlan_xmit_skb(struct vxlan_sock *vs,
                   struct rtable *rt, struct sk_buff *skb,
                   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
-                  __be16 src_port, __be16 dst_port, __be32 vni)
+                  __be16 src_port, __be16 dst_port, __be32 vni, __be32 nsp)
 {
+       bool isnsh = (dst_port == htons(NSH_DST_PORT));
        struct vxlanhdr *vxh;
        struct udphdr *uh;
        int min_headroom;
@@ -222,7 +259,7 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
        skb_reset_inner_headers(skb);
 
        min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
-                       + VXLAN_HLEN + sizeof(struct iphdr)
+                       + (isnsh ? NSH_HLEN : VXLAN_HLEN) + sizeof(struct iphdr)
                        + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
 
        /* Need space for new headers (invalidates iph ptr) */
@@ -239,6 +276,20 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
                vlan_set_tci(skb, 0);
        }
 
+       if (isnsh) {
+               struct nshhdr *nsh;
+               uint8_t nsi = ntohl(nsp) & NSH_M_NSI;
+
+               nsh = (struct nshhdr *) __skb_push(skb, sizeof(*nsh));
+               nsh->b.o = 0;
+               nsh->b.res = 0;
+               nsh->b.svc_idx = nsi ? nsi : 0x01;
+               nsh->b.proto = htons(NSH_P_TEB);
+               nsh->b.svc_path = nsp & htonl(NSH_M_NSP);
+               nsh->b.c = 0;
+               memset(&nsh->c, 0x00, sizeof nsh->c);
+       }
+
        vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
        vxh->vx_flags = htonl(VXLAN_FLAGS);
        vxh->vx_vni = vni;
diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c
index b6c1d6f..139fe17 100644
--- a/datapath/vport-gre.c
+++ b/datapath/vport-gre.c
@@ -112,7 +112,8 @@ static int gre_rcv(struct sk_buff *skb,
                return PACKET_REJECT;
 
        key = key_to_tunnel_id(tpi->key, tpi->seq);
-       ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key, 
filter_tnl_flags(tpi->flags));
+       ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key, 0,
+                             filter_tnl_flags(tpi->flags));
 
        ovs_vport_receive(vport, skb, &tun_key);
        return PACKET_RCVD;
diff --git a/datapath/vport-lisp.c b/datapath/vport-lisp.c
index e4e603f..77df084 100644
--- a/datapath/vport-lisp.c
+++ b/datapath/vport-lisp.c
@@ -232,7 +232,7 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb)
 
        /* Save outer tunnel values */
        iph = ip_hdr(skb);
-       ovs_flow_tun_key_init(&tun_key, iph, key, TUNNEL_KEY);
+       ovs_flow_tun_key_init(&tun_key, iph, key, 0, TUNNEL_KEY);
 
        /* Drop non-IP inner packets */
        inner_iph = (struct iphdr *)(lisph + 1);
diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c
index 3401dfd..c5d1b5a 100644
--- a/datapath/vport-vxlan.c
+++ b/datapath/vport-vxlan.c
@@ -59,7 +59,8 @@ static inline struct vxlan_port *vxlan_vport(const struct 
vport *vport)
 }
 
 /* Called with rcu_read_lock and BH disabled. */
-static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 
vx_vni)
+static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
+                     __be32 vx_vni, __be32 nsp)
 {
        struct ovs_key_ipv4_tunnel tun_key;
        struct vport *vport = vs->data;
@@ -69,7 +70,8 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff 
*skb, __be32 vx_vni)
        /* Save outer tunnel values */
        iph = ip_hdr(skb);
        key = cpu_to_be64(ntohl(vx_vni) >> 8);
-       ovs_flow_tun_key_init(&tun_key, iph, key, TUNNEL_KEY);
+       ovs_flow_tun_key_init(&tun_key, iph, key, nsp,
+                             TUNNEL_KEY | TUNNEL_NSP);
 
        ovs_vport_receive(vport, skb, &tun_key);
 }
@@ -181,7 +183,8 @@ static int vxlan_tnl_send(struct vport *vport, struct 
sk_buff *skb)
                             OVS_CB(skb)->tun_key->ipv4_tos,
                             OVS_CB(skb)->tun_key->ipv4_ttl, df,
                             src_port, dst_port,
-                            htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 
8));
+                            htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 
8),
+                            OVS_CB(skb)->tun_key->nsp);
        if (err < 0)
                ip_rt_put(rt);
 error:
diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
index 09c26b5..6239400 100644
--- a/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h
@@ -304,6 +304,7 @@ enum ovs_tunnel_key_attr {
        OVS_TUNNEL_KEY_ATTR_TTL,                /* u8 Tunnel IP TTL. */
        OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT,      /* No argument, set DF. */
        OVS_TUNNEL_KEY_ATTR_CSUM,               /* No argument. CSUM packet. */
+       OVS_TUNNEL_KEY_ATTR_NSP,                /* be32 NSH service path */
        __OVS_TUNNEL_KEY_ATTR_MAX
 };
 
-- 
1.7.9.5

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to