This patch adds support for Network Service Headers (nsh) over VXLAN as mentioned in [1]. Here changes are made to datapath to add nsh headers whenever a vxlan port with destination port as 9030 is created. IANA port allocation for nsh over vxlan is yet to be done.
[1] http://tools.ietf.org/html/draft-quinn-nsh-01 Signed-off-by: pritesh <pritesh.koth...@cisco.com> create mode 100644 datapath/linux/compat/include/net/nsh.h diff --git a/datapath/datapath.c b/datapath/datapath.c index 4defcdb..285b571 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -369,6 +369,7 @@ static size_t key_attr_size(void) + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ + + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_NSP */ + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ diff --git a/datapath/flow.c b/datapath/flow.c index 29122af..4f47a48 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -1235,6 +1235,7 @@ int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, int rem; bool ttl = false; __be16 tun_flags = 0; + __be32 nsp = 0; nla_for_each_nested(a, attr, rem) { int type = nla_type(a); @@ -1246,6 +1247,7 @@ int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, [OVS_TUNNEL_KEY_ATTR_TTL] = 1, [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, + [OVS_TUNNEL_KEY_ATTR_NSP] = sizeof(u32), }; if (type > OVS_TUNNEL_KEY_ATTR_MAX) { @@ -1290,11 +1292,16 @@ int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, case OVS_TUNNEL_KEY_ATTR_CSUM: tun_flags |= TUNNEL_CSUM; break; + case OVS_TUNNEL_KEY_ATTR_NSP: + nsp |= htonl(be32_to_cpu(nla_get_be32(a)) << 8); + tun_flags |= TUNNEL_NSP; + break; default: return -EINVAL; } } + SW_FLOW_KEY_PUT(match, tun_key.nsp, nsp, is_mask); SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask); if (rem > 0) { @@ -1322,6 +1329,7 @@ int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, const struct ovs_key_ipv4_tunnel *output) { struct nlattr *nla; + __be32 nsp = cpu_to_be32(ntohl(output->nsp) >> 8); nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); if (!nla) @@ -1347,6 +1355,9 @@ int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, if ((output->tun_flags & TUNNEL_CSUM) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) return -EMSGSIZE; + if (output->tun_flags & TUNNEL_NSP && + nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_NSP, nsp)) + return -EMSGSIZE; nla_nest_end(skb, nla); return 0; diff --git a/datapath/flow.h b/datapath/flow.h index 03eae03..b316e0a 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -51,6 +51,7 @@ struct sw_flow_actions { struct ovs_key_ipv4_tunnel { __be64 tun_id; + __be32 nsp; __be32 ipv4_src; __be32 ipv4_dst; __be16 tun_flags; @@ -60,9 +61,10 @@ struct ovs_key_ipv4_tunnel { static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key, const struct iphdr *iph, __be64 tun_id, - __be16 tun_flags) + __be32 nsp, __be16 tun_flags) { tun_key->tun_id = tun_id; + tun_key->nsp = nsp; tun_key->ipv4_src = iph->saddr; tun_key->ipv4_dst = iph->daddr; tun_key->ipv4_tos = iph->tos; diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk index 057e1d5..7e0acce 100644 --- a/datapath/linux/Modules.mk +++ b/datapath/linux/Modules.mk @@ -65,4 +65,5 @@ openvswitch_headers += \ linux/compat/include/net/net_namespace.h \ linux/compat/include/net/netlink.h \ linux/compat/include/net/vxlan.h \ + linux/compat/include/net/nsh.h \ linux/compat/include/net/sctp/checksum.h diff --git a/datapath/linux/compat/include/net/ip_tunnels.h b/datapath/linux/compat/include/net/ip_tunnels.h index a786aa9..a4aec4d 100644 --- a/datapath/linux/compat/include/net/ip_tunnels.h +++ b/datapath/linux/compat/include/net/ip_tunnels.h @@ -20,6 +20,7 @@ #define TUNNEL_VERSION __cpu_to_be16(0x40) #define TUNNEL_NO_KEY __cpu_to_be16(0x80) #define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100) +#define TUNNEL_NSP __cpu_to_be16(0x0200) struct tnl_ptk_info { __be16 flags; diff --git a/datapath/linux/compat/include/net/nsh.h b/datapath/linux/compat/include/net/nsh.h new file mode 100644 index 0000000..bd4d7fb --- /dev/null +++ b/datapath/linux/compat/include/net/nsh.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2013 Cisco Systems, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#ifndef NSH_H +#define NSH_H 1 + +#include <linux/types.h> +#include <asm/byteorder.h> + + +/** + * struct nsh_bhdr - Network Service Base Header. + * @o: Operations and Management Packet indicator bit + * @c: If this bit is set then one or more contexts are in use. + * @proto: IEEE Ethertypes to indicate the frame within. + * @svc_idx: TTL functionality and location within service path. + * @svc_path: To uniquely identify service path. + */ +struct nsh_base { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 res:6, + c:1, + o:1; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u8 o:1, + c:1, + res:6; +#else +#error "Bitfield Endianess not defined." +#endif + __be16 proto; + __u8 svc_idx; + __be32 svc_path; +}__attribute__((packed)); + +/** + * struct nsh_ctx - Keeps track of NSH context data + * @npc: NSH network platform context + * @nsc: NSH network shared context + * @spc: NSH service platform context + * @ssc: NSH service shared context + */ +struct nsh_ctx { + __be32 npc; + __be32 nsc; + __be32 spc; + __be32 ssc; +}; + +/** + * struct nshdr - Network Service header + * @nsh_base: Network Service Base Header. + * @nsh_ctx: Network Service Context Header. + */ +struct nshhdr { + struct nsh_base b; + struct nsh_ctx c; +}; + + +/* NSH Header Length */ +#define NSH_HLEN (sizeof(struct udphdr) + \ + sizeof(struct vxlanhdr) + \ + sizeof(struct nshhdr)) +#define NSH_DST_PORT 9030 /* UDP Port for NSH on VXLAN */ +#define NSH_P_TEB 0x6558 /* Transparent Ethernet Bridging */ +#define NSH_M_NSP 0xFFFFFF00 +#define NSH_M_NSI 0x000000FF + + +#endif /* nsh.h */ diff --git a/datapath/linux/compat/include/net/vxlan.h b/datapath/linux/compat/include/net/vxlan.h index 3ac816b..1c15dfb 100644 --- a/datapath/linux/compat/include/net/vxlan.h +++ b/datapath/linux/compat/include/net/vxlan.h @@ -4,9 +4,11 @@ #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/udp.h> +#include <net/nsh.h> struct vxlan_sock; -typedef void (vxlan_rcv_t)(struct vxlan_sock *vs, struct sk_buff *skb, __be32 key); +typedef void (vxlan_rcv_t)(struct vxlan_sock *vs, struct sk_buff *skb, + __be32 key, __be32 nsp); /* per UDP socket information */ struct vxlan_sock { @@ -27,7 +29,7 @@ void vxlan_sock_release(struct vxlan_sock *vs); int vxlan_xmit_skb(struct vxlan_sock *vs, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, - __be16 src_port, __be16 dst_port, __be32 vni); + __be16 src_port, __be16 dst_port, __be32 vni, __be32 nsp); __be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb); diff --git a/datapath/linux/compat/vxlan.c b/datapath/linux/compat/vxlan.c index 4f7671b..8a6d864 100644 --- a/datapath/linux/compat/vxlan.c +++ b/datapath/linux/compat/vxlan.c @@ -50,6 +50,7 @@ #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/vxlan.h> +#include <net/nsh.h> #include "compat.h" #include "gso.h" @@ -89,6 +90,16 @@ static inline struct hlist_head *vs_head(struct net *net, __be16 port) return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)]; } +static inline struct vxlanhdr *vxlan_hdr(const struct sk_buff *skb) +{ + return (struct vxlanhdr *)(udp_hdr(skb) + 1); +} + +static inline struct nshhdr *nsh_hdr(const struct sk_buff *skb) +{ + return (struct nshhdr *)(vxlan_hdr(skb) + 1); +} + /* Find VXLAN socket based on network namespace and UDP port */ static struct vxlan_sock *vxlan_find_sock(struct net *net, __be16 port) @@ -107,13 +118,20 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct vxlan_sock *vs; struct vxlanhdr *vxh; + struct udphdr *udp; + bool isnsh = false; + __be32 nsp = 0; + + udp = (struct udphdr *)udp_hdr(skb); + if (udp->dest == htons(NSH_DST_PORT)) + isnsh = true; /* Need Vxlan and inner Ethernet header to be present */ - if (!pskb_may_pull(skb, VXLAN_HLEN)) + if (!pskb_may_pull(skb, isnsh ? NSH_HLEN : VXLAN_HLEN)) goto error; /* Return packets with reserved bits set */ - vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1); + vxh = vxlan_hdr(skb); if (vxh->vx_flags != htonl(VXLAN_FLAGS) || (vxh->vx_vni & htonl(0xff))) { pr_warn("invalid vxlan flags=%#x vni=%#x\n", @@ -121,14 +139,32 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) goto error; } - if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB))) + if (isnsh) { + struct nshhdr *nsh = nsh_hdr(skb); + + if (unlikely(nsh->b.svc_idx == 0)) { + pr_warn("NSH service index reached zero\n"); + goto drop; + } + + if (unlikely(nsh->b.svc_path & htonl(NSH_M_NSI))) { + pr_warn("invalid NSH service path=%#x\n", + ntohl(nsh->b.svc_path)); + goto drop; + } + + nsp = nsh->b.svc_path | htonl(nsh->b.svc_idx); + } + + if (iptunnel_pull_header(skb, isnsh ? NSH_HLEN : VXLAN_HLEN, + htons(ETH_P_TEB))) goto drop; vs = vxlan_find_sock(sock_net(sk), inet_sport(sk)); if (!vs) goto drop; - vs->rcv(vs, skb, vxh->vx_vni); + vs->rcv(vs, skb, vxh->vx_vni, nsp); return 0; drop: @@ -212,8 +248,9 @@ static int handle_offloads(struct sk_buff *skb) int vxlan_xmit_skb(struct vxlan_sock *vs, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, - __be16 src_port, __be16 dst_port, __be32 vni) + __be16 src_port, __be16 dst_port, __be32 vni, __be32 nsp) { + bool isnsh = (dst_port == htons(NSH_DST_PORT)); struct vxlanhdr *vxh; struct udphdr *uh; int min_headroom; @@ -222,7 +259,7 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, skb_reset_inner_headers(skb); min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len - + VXLAN_HLEN + sizeof(struct iphdr) + + (isnsh ? NSH_HLEN : VXLAN_HLEN) + sizeof(struct iphdr) + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); /* Need space for new headers (invalidates iph ptr) */ @@ -239,6 +276,20 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, vlan_set_tci(skb, 0); } + if (isnsh) { + struct nshhdr *nsh; + uint8_t nsi = ntohl(nsp) & NSH_M_NSI; + + nsh = (struct nshhdr *) __skb_push(skb, sizeof(*nsh)); + nsh->b.o = 0; + nsh->b.res = 0; + nsh->b.svc_idx = nsi ? nsi : 0x01; + nsh->b.proto = htons(NSH_P_TEB); + nsh->b.svc_path = nsp & htonl(NSH_M_NSP); + nsh->b.c = 0; + memset(&nsh->c, 0x00, sizeof nsh->c); + } + vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); vxh->vx_flags = htonl(VXLAN_FLAGS); vxh->vx_vni = vni; diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c index b6c1d6f..139fe17 100644 --- a/datapath/vport-gre.c +++ b/datapath/vport-gre.c @@ -112,7 +112,8 @@ static int gre_rcv(struct sk_buff *skb, return PACKET_REJECT; key = key_to_tunnel_id(tpi->key, tpi->seq); - ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key, filter_tnl_flags(tpi->flags)); + ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key, 0, + filter_tnl_flags(tpi->flags)); ovs_vport_receive(vport, skb, &tun_key); return PACKET_RCVD; diff --git a/datapath/vport-lisp.c b/datapath/vport-lisp.c index e4e603f..77df084 100644 --- a/datapath/vport-lisp.c +++ b/datapath/vport-lisp.c @@ -232,7 +232,7 @@ static int lisp_rcv(struct sock *sk, struct sk_buff *skb) /* Save outer tunnel values */ iph = ip_hdr(skb); - ovs_flow_tun_key_init(&tun_key, iph, key, TUNNEL_KEY); + ovs_flow_tun_key_init(&tun_key, iph, key, 0, TUNNEL_KEY); /* Drop non-IP inner packets */ inner_iph = (struct iphdr *)(lisph + 1); diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c index 3401dfd..c5d1b5a 100644 --- a/datapath/vport-vxlan.c +++ b/datapath/vport-vxlan.c @@ -59,7 +59,8 @@ static inline struct vxlan_port *vxlan_vport(const struct vport *vport) } /* Called with rcu_read_lock and BH disabled. */ -static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni) +static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, + __be32 vx_vni, __be32 nsp) { struct ovs_key_ipv4_tunnel tun_key; struct vport *vport = vs->data; @@ -69,7 +70,8 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni) /* Save outer tunnel values */ iph = ip_hdr(skb); key = cpu_to_be64(ntohl(vx_vni) >> 8); - ovs_flow_tun_key_init(&tun_key, iph, key, TUNNEL_KEY); + ovs_flow_tun_key_init(&tun_key, iph, key, nsp, + TUNNEL_KEY | TUNNEL_NSP); ovs_vport_receive(vport, skb, &tun_key); } @@ -181,7 +183,8 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) OVS_CB(skb)->tun_key->ipv4_tos, OVS_CB(skb)->tun_key->ipv4_ttl, df, src_port, dst_port, - htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8)); + htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8), + OVS_CB(skb)->tun_key->nsp); if (err < 0) ip_rt_put(rt); error: diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h index 09c26b5..6239400 100644 --- a/include/linux/openvswitch.h +++ b/include/linux/openvswitch.h @@ -304,6 +304,7 @@ enum ovs_tunnel_key_attr { OVS_TUNNEL_KEY_ATTR_TTL, /* u8 Tunnel IP TTL. */ OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT, /* No argument, set DF. */ OVS_TUNNEL_KEY_ATTR_CSUM, /* No argument. CSUM packet. */ + OVS_TUNNEL_KEY_ATTR_NSP, /* be32 NSH service path */ __OVS_TUNNEL_KEY_ATTR_MAX }; -- 1.7.9.5 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev