Support encap/decap of Network Service Header (NSH) as defined in
https://tools.ietf.org/html/draft-ietf-sfc-nsh-01

Includes support for Type 1 and Type 2 metadata and a simple registration
for listeners to see decapsulated packets based on the Type/Class.

Signed-off-by: Brian Russell <bruss...@brocade.com>
---
 include/net/nsh.h             | 158 ++++++++++++++++++
 include/uapi/linux/if_ether.h |   1 +
 net/ipv4/Kconfig              |  10 ++
 net/ipv4/Makefile             |   1 +
 net/ipv4/nsh.c                | 362 ++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 532 insertions(+)
 create mode 100644 include/net/nsh.h
 create mode 100644 net/ipv4/nsh.c

diff --git a/include/net/nsh.h b/include/net/nsh.h
new file mode 100644
index 0000000..7a5fb95
--- /dev/null
+++ b/include/net/nsh.h
@@ -0,0 +1,158 @@
+/*
+ * Network Service Header (NSH) inserted onto encapsulated packets
+ * or frames to realize service function paths.
+ * NSH also provides a mechanism for metadata exchange along the
+ * instantiated service path.
+ *
+ * https://tools.ietf.org/html/draft-ietf-sfc-nsh-01
+ *
+ * Copyright (c) 2015 by Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+#ifndef __NET_NSH_H
+#define __NET_NSH_H
+
+#include <linux/types.h>
+#include <linux/skbuff.h>
+
+/*
+ * NSH Base Header + Service Path Header
+ *
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |Ver|O|C|R|R|R|R|R|R|   Length  |    MD Type    | Next Protocol |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |          Service Path ID                      | Service Index |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Ver - Version, set to 0
+ * O - Indicates payload is OAM.
+ * C - Indicates critical metadata TLV is present (must be 0 for MD type 1).
+ * Length - total header length in 4-byte words.
+ * MD Type - Metadata type
+ *           Type 1 - 4 mandatory 4 byte context headers.
+ *           Type 2 - 0 or more var length context headers.
+ * Next Protocol - protocol type of original packet.
+ * Service Path ID (SPI) - identifies a service path. Participating nodes
+ *                         MUST use this identifier for Service Function
+ *                         Path selection.
+ * Service Index (SI) - provides location within the SFP.
+ */
+#define NSH_BF_VER0     0
+#define NSH_BF_VER_MASK 0xc0
+#define NSH_BF_OAM      BIT(5)
+#define NSH_BF_CRIT     BIT(4)
+#define NSH_N_SPI       (1u << 24)
+#define NSH_SPI_MASK    ((NSH_N_SPI-1) << 8)
+#define NSH_N_SI        (1u << 8)
+#define NSH_SI_MASK     (NSH_N_SI-1)
+
+#define NSH_MD_TYPE_1   1
+#define NSH_MD_TYPE_2   2
+
+#define NSH_NEXT_PROTO_IPv4 1
+#define NSH_NEXT_PROTO_IPv6 2
+#define NSH_NEXT_PROTO_ETH  3
+
+#define NSH_LEN_TYPE_1     6
+#define NSH_LEN_TYPE_2_MIN 2
+
+struct nsh_base {
+       __u8 base_flags;
+       __u8 length;
+       __u8 md_type;
+       __u8 next_proto;
+};
+
+struct nsh_header {
+       struct nsh_base base;
+       __be32 sp_header;
+};
+
+/*
+ * When the Base Header specifies MD Type 1, four 4-byte Context Headers
+ * MUST be added immediately following the Service Path Header. Thus length
+ * in the base header is set to 6.
+ * Context Headers that carry no metadata MUST be set to zero.
+ */
+#define NSH_MD_TYPE_1_NUM_HDRS 4
+
+struct nsh_md_type_1 {
+       __be32 ctx_hdr1;
+       __be32 ctx_hdr2;
+       __be32 ctx_hdr3;
+       __be32 ctx_hdr4;
+};
+
+/*
+ * When the Base Header specifies MD Type 2, zero or more variable
+ * length Context Headers follow the Service Path Header.
+ *
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     |          TLV Class            |C|    Type     |R|R|R|   Len   |
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     |                      Variable Metadata                        |
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * TLV Class - Scope of class (e.g. may be vendor or standards body).
+ * Type - Specific type of information within the scope of given class.
+ *        C bit (MSB) indicates criticality. When set, receiver must process.
+ * Len - Length of variable metadata in 4-byte words.
+ */
+#define NSH_TYPE_CRIT BIT(7)
+
+struct nsh_md_type_2 {
+       __be16 tlv_class;
+       __u8 tlv_type;
+       __u8 length;
+};
+
+/*
+ * Context header for encap/decap.
+ */
+#define NSH_MD_CLASS_TYPE_1 USHRT_MAX
+#define NSH_MD_TYPE_TYPE_1  U8_MAX
+#define NSH_MD_LEN_TYPE_1   4
+
+struct nsh_metadata {
+       u_short class;
+       u_char crit;
+       u_char type;
+       u_int len;  /* 4 byte words */
+       void *data;
+};
+
+/*
+ * Parse NSH header and notify registered listeners about any metadata.
+ */
+int nsh_decap(struct sk_buff *skb,
+             u_int *spi,
+             u_char *si,
+             u_char *np);
+
+/*
+ * Add NSH header.
+ */
+int nsh_encap(struct sk_buff *skb,
+             u_int spi,
+             u_char si,
+             u_char np,
+             u_int num_ctx_hdrs,
+             struct nsh_metadata *ctx_hdrs);
+
+
+/* Register hooks to be informed of nsh metadata of specified class */
+struct nsh_listener {
+       struct list_head list;
+       u_short class;
+       u_char max_ctx_hdrs;
+       int (*notify)(struct sk_buff *skb,
+                     u_int service_path_id,
+                     u_char service_index,
+                     u_char next_proto,
+                     struct nsh_metadata *ctx_hdrs,
+                     u_int num_ctx_hdrs);
+};
+
+int nsh_register_listener(struct nsh_listener *listener);
+int nsh_unregister_listener(struct nsh_listener *listener);
+#endif /* __NET_NSH_H */
diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index ea9221b..eb512b1 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -91,6 +91,7 @@
 #define ETH_P_TDLS     0x890D          /* TDLS */
 #define ETH_P_FIP      0x8914          /* FCoE Initialization Protocol */
 #define ETH_P_80221    0x8917          /* IEEE 802.21 Media Independent 
Handover Protocol */
+#define ETH_P_NSH       0x894F          /* Network Service Header */
 #define ETH_P_LOOPBACK 0x9000          /* Ethernet loopback packet, per IEEE 
802.3 */
 #define ETH_P_QINQ1    0x9100          /* deprecated QinQ VLAN [ NOT AN 
OFFICIALLY REGISTERED ID ] */
 #define ETH_P_QINQ2    0x9200          /* deprecated QinQ VLAN [ NOT AN 
OFFICIALLY REGISTERED ID ] */
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 7758247..37c8c23 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -212,6 +212,16 @@ config NET_IPGRE_BROADCAST
          Network), but can be distributed all over the Internet. If you want
          to do that, say Y here and to "IP multicast routing" below.
 
+config NET_NSH
+        tristate 'Network Servive Header Encapsulation'
+        help
+          Network Service Header (NSH) inserted onto
+          encapsulated packets or frames to realize service function paths.
+          NSH also provides a mechanism for metadata exchange along the
+          instantiated service path.
+
+          To compile it as a module, choose M here.  If unsure, say N.
+
 config IP_MROUTE
        bool "IP: multicast routing"
        depends on IP_MULTICAST
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 62c049b..46d65f8 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -24,6 +24,7 @@ gre-y := gre_demux.o
 obj-$(CONFIG_NET_FOU) += fou.o
 obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o
 obj-$(CONFIG_NET_IPGRE) += ip_gre.o
+obj-$(CONFIG_NET_NSH) += nsh.o
 obj-$(CONFIG_NET_UDP_TUNNEL) += udp_tunnel.o
 obj-$(CONFIG_NET_IPVTI) += ip_vti.o
 obj-$(CONFIG_SYN_COOKIES) += syncookies.o
diff --git a/net/ipv4/nsh.c b/net/ipv4/nsh.c
new file mode 100644
index 0000000..70e5ef0
--- /dev/null
+++ b/net/ipv4/nsh.c
@@ -0,0 +1,362 @@
+/*
+ * Network Service Header (NSH) inserted onto encapsulated packets
+ * or frames to realize service function paths.
+ * NSH also provides a mechanism for metadata exchange along the
+ * instantiated service path.
+ *
+ * https://tools.ietf.org/html/draft-ietf-sfc-nsh-01
+ *
+ * Copyright (c) 2015 by Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+#include <linux/module.h>
+#include <net/nsh.h>
+
+static struct list_head nsh_listeners;
+static DEFINE_MUTEX(nsh_listener_mutex);
+static struct nsh_metadata *decap_ctx_hdrs;
+static u_char limit_ctx_hdrs = 10;
+module_param_named(nsh_hdrs, limit_ctx_hdrs, byte, 0444);
+MODULE_PARM_DESC(nsh_hdrs, "Maximum NSH metadata headers per packet");
+
+int nsh_register_listener(struct nsh_listener *listener)
+{
+       if (listener->max_ctx_hdrs > limit_ctx_hdrs)
+               return -ENOMEM;
+
+       mutex_lock(&nsh_listener_mutex);
+       list_add(&listener->list, &nsh_listeners);
+       mutex_unlock(&nsh_listener_mutex);
+       return 0;
+}
+EXPORT_SYMBOL(nsh_register_listener);
+
+int nsh_unregister_listener(struct nsh_listener *listener)
+{
+       mutex_lock(&nsh_listener_mutex);
+       list_del(&listener->list);
+       mutex_unlock(&nsh_listener_mutex);
+       return 0;
+}
+EXPORT_SYMBOL(nsh_unregister_listener);
+
+static int
+notify_listeners(struct sk_buff *skb,
+                u_int service_path_id,
+                u_char service_index,
+                u_char next_proto,
+                struct nsh_metadata *ctx_hdrs,
+                u_int num_ctx_hdrs)
+{
+       struct nsh_listener *listener;
+       int i, err = 0;
+
+       mutex_lock(&nsh_listener_mutex);
+       list_for_each_entry(listener, &nsh_listeners, list) {
+               for (i = 0; i < num_ctx_hdrs; i++)
+                       if (listener->class == ctx_hdrs[i].class) {
+                               err = listener->notify(skb,
+                                                      service_path_id,
+                                                      service_index,
+                                                      next_proto,
+                                                      ctx_hdrs,
+                                                      num_ctx_hdrs);
+                               if (err < 0) {
+                                       mutex_unlock(&nsh_listener_mutex);
+                                       return err;
+                               }
+                               break;
+                       }
+       }
+       mutex_unlock(&nsh_listener_mutex);
+       return 0;
+}
+
+static int
+type_1_decap(struct sk_buff *skb,
+            struct nsh_md_type_1 *md,
+            u_int max_ctx_hdrs,
+            struct nsh_metadata *ctx_hdrs,
+            u_int *num_ctx_hdrs)
+{
+       int i;
+       u32 *data =  &md->ctx_hdr1;
+
+       if (max_ctx_hdrs == 0)
+               return -ENOMEM;
+
+       ctx_hdrs[0].class = NSH_MD_CLASS_TYPE_1;
+       ctx_hdrs[0].type = NSH_MD_TYPE_TYPE_1;
+       ctx_hdrs[0].len = NSH_MD_LEN_TYPE_1;
+       ctx_hdrs[0].data = data;
+
+       for (i = 0; i < NSH_MD_TYPE_1_NUM_HDRS; i++, data++)
+               *data = ntohl(*data);
+
+       *num_ctx_hdrs = 1;
+
+       return 0;
+}
+
+static int
+type_2_decap(struct sk_buff *skb,
+            struct nsh_md_type_2 *md,
+            u_int md_len,
+            u_int max_ctx_hdrs,
+            struct nsh_metadata *ctx_hdrs,
+            u_int *num_ctx_hdrs)
+{
+       u32 *data;
+       int i = 0, j;
+
+       while (md_len > 0) {
+               if (i > max_ctx_hdrs)
+                       return -ENOMEM;
+
+               ctx_hdrs[i].class = ntohs(md->tlv_class);
+               ctx_hdrs[i].type = md->tlv_type;
+               if (ctx_hdrs[i].type & NSH_TYPE_CRIT) {
+                       ctx_hdrs[i].type &= ~NSH_TYPE_CRIT;
+                       ctx_hdrs[i].crit = 1;
+               }
+               ctx_hdrs[i].len = md->length;
+
+               data = (u32 *) ++md;
+               md_len--;
+
+               ctx_hdrs[i].data = data;
+
+               for (j = 0; j < ctx_hdrs[i].len; j++)
+                       data[j] = ntohl(data[j]);
+
+               md = (struct nsh_md_type_2 *)&data[j];
+               md_len -= j;
+               i++;
+       }
+       *num_ctx_hdrs = i;
+
+       return 0;
+}
+
+/* Parse NSH header.
+ *
+ * No additional memory is allocated. Context header data is pointed
+ * to in the buffer payload. Context headers and skb are passed to anyone
+ * who has registered interest in the class(es) of metadata received.
+ *
+ * Returns the total number of 4 byte words in the NSH headers, <0 on failure.
+ */
+int nsh_decap(struct sk_buff *skb,
+             u_int *spi,
+             u_char *si,
+             u_char *np)
+{
+       struct nsh_header *nsh = (struct nsh_header *)skb->data;
+       struct nsh_base *base = &nsh->base;
+       u_int max_ctx_hdrs = limit_ctx_hdrs;
+       u_int num_ctx_hdrs;
+       u_int service_path_id;
+       u_char service_index;
+       u_char next_proto;
+       u32 sph;
+       u_char md_type;
+       u_char hdrlen; /* 4 byte words */
+       u_int len; /* bytes */
+       int err;
+
+       hdrlen = base->length;
+       len = hdrlen * sizeof(u32);
+
+       if (unlikely(!pskb_may_pull(skb, len)))
+               return -ENOMEM;
+
+       skb_pull_rcsum(skb, len);
+
+       if (((base->base_flags & NSH_BF_VER_MASK) >> 6) != NSH_BF_VER0)
+               return -EINVAL;
+
+       next_proto = base->next_proto;
+
+       switch (next_proto) {
+       case NSH_NEXT_PROTO_IPv4:
+               skb->protocol = htons(ETH_P_IP);
+               break;
+       case NSH_NEXT_PROTO_IPv6:
+               skb->protocol = htons(ETH_P_IPV6);
+               break;
+       case NSH_NEXT_PROTO_ETH:
+               skb->protocol = htons(ETH_P_TEB);
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       if (np)
+               *np = next_proto;
+
+       md_type = base->md_type;
+
+       switch (md_type) {
+       case NSH_MD_TYPE_1:
+               if (hdrlen != NSH_LEN_TYPE_1)
+                       return -EINVAL;
+               err = type_1_decap(skb, (struct nsh_md_type_1 *) ++nsh,
+                                  max_ctx_hdrs, decap_ctx_hdrs, &num_ctx_hdrs);
+               break;
+       case NSH_MD_TYPE_2:
+               if (hdrlen < NSH_LEN_TYPE_2_MIN)
+                       return -EINVAL;
+               err = type_2_decap(skb, (struct nsh_md_type_2 *) ++nsh,
+                                  hdrlen - NSH_LEN_TYPE_2_MIN,
+                                  max_ctx_hdrs, decap_ctx_hdrs, &num_ctx_hdrs);
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       if (err < 0)
+               return err;
+
+       sph = ntohl(nsh->sp_header);
+       service_path_id = (sph & NSH_SPI_MASK) >> 8;
+       service_index = sph & NSH_SI_MASK;
+
+       if (spi)
+               *spi = service_path_id;
+       if (si)
+               *si = service_index;
+
+       err = notify_listeners(skb, service_path_id,
+                              service_index, next_proto,
+                              decap_ctx_hdrs, num_ctx_hdrs);
+       if (err < 0)
+               return err;
+
+       return hdrlen;
+}
+EXPORT_SYMBOL_GPL(nsh_decap);
+
+static void
+type_1_encap(u32 *data_out,
+            struct nsh_metadata *ctx_hdrs)
+{
+       int i;
+       u32 *data_in = (u32 *)ctx_hdrs[0].data;
+
+       for (i = 0; i < NSH_MD_TYPE_1_NUM_HDRS; i++)
+               data_out[i] = htonl(data_in[i]);
+}
+
+static void
+type_2_encap(struct nsh_md_type_2 *md,
+            u_int num_ctx_hdrs,
+            struct nsh_metadata *ctx_hdrs)
+{
+       int i, j;
+       u32 *data_in, *data_out;
+
+       for (i = 0; i < num_ctx_hdrs; i++) {
+               md->tlv_class = htons(ctx_hdrs[i].class);
+               md->tlv_type = ctx_hdrs[i].type;
+               if (ctx_hdrs[i].crit)
+                       md->tlv_type |= NSH_TYPE_CRIT;
+               md->length = ctx_hdrs[i].len;
+
+               data_out = (u32 *) ++md;
+               data_in = (u32 *)ctx_hdrs[i].data;
+
+               for (j = 0; j < ctx_hdrs[i].len; j++)
+                       data_out[j] = htonl(data_in[j]);
+
+               md = (struct nsh_md_type_2 *)&data_out[j];
+       }
+}
+
+/* Add NSH header.
+ */
+int nsh_encap(struct sk_buff *skb,
+             u_int spi,
+             u_char si,
+             u_char np,
+             u_int num_ctx_hdrs,
+             struct nsh_metadata *ctx_hdrs)
+{
+       bool has_t1 = false, has_t2 = false;
+       bool has_crit = false;
+       u_int headroom = sizeof(struct nsh_header);
+       struct nsh_header *nsh;
+       struct nsh_base *base;
+       int i;
+       int err;
+
+       if (np != NSH_NEXT_PROTO_IPv4 &&
+           np != NSH_NEXT_PROTO_IPv6 &&
+           np != NSH_NEXT_PROTO_ETH)
+               return -EINVAL;
+
+       if (spi >= NSH_N_SPI)
+               return -EINVAL;
+
+       for (i = 0; i < num_ctx_hdrs; i++) {
+               if (ctx_hdrs[i].class == NSH_MD_CLASS_TYPE_1) {
+                       if (num_ctx_hdrs != 1)
+                               return -EINVAL;
+                       headroom += NSH_MD_LEN_TYPE_1 * sizeof(u32);
+                       has_t1 |= true;
+               } else {
+                       headroom += ctx_hdrs[i].len * sizeof(u32) +
+                               sizeof(struct nsh_md_type_2);
+                       has_t2 |= true;
+                       has_crit |= ctx_hdrs[i].type & NSH_TYPE_CRIT;
+               }
+
+               if (has_t1 && has_t2)
+                       return -EINVAL;
+       }
+
+       err = skb_cow_head(skb, headroom);
+       if (err)
+               return err;
+
+       nsh = (struct nsh_header *)__skb_push(skb, headroom);
+
+       base = &nsh->base;
+       base->base_flags = has_crit ? NSH_BF_CRIT : 0; /* Ver 0, OAM 0 */
+       base->length = headroom / sizeof(u32);
+       base->md_type = has_t1 ? NSH_MD_TYPE_1 : NSH_MD_TYPE_2;
+       base->next_proto = np;
+
+       nsh->sp_header = htonl((spi << 8) | si);
+
+       if (has_t1)
+               type_1_encap((u32 *) ++nsh, ctx_hdrs);
+       else
+               type_2_encap((struct nsh_md_type_2 *) ++nsh, num_ctx_hdrs,
+                            ctx_hdrs);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(nsh_encap);
+
+static int __init nsh_init(void)
+{
+       INIT_LIST_HEAD(&nsh_listeners);
+
+       decap_ctx_hdrs = kmalloc_array(limit_ctx_hdrs, sizeof(*decap_ctx_hdrs),
+                                      GFP_KERNEL);
+       if (!decap_ctx_hdrs)
+               return -ENOMEM;
+
+       return 0;
+}
+
+static void __exit nsh_exit(void)
+{
+       kfree(decap_ctx_hdrs);
+}
+
+module_init(nsh_init);
+module_exit(nsh_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Brian Russell <bruss...@brocade.com>");
+MODULE_DESCRIPTION("Network Service Header Encap/Decap");
-- 
2.1.4

Reply via email to