btw, first and foremost, thanks for the review, i greatly appreciate it :) >> diff --git a/datapath/flow.c b/datapath/flow.c >> index 29122af..4f47a48 100644 >> --- a/datapath/flow.c >> +++ b/datapath/flow.c >> @@ -1235,6 +1235,7 @@ int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, >> int rem; >> bool ttl = false; >> __be16 tun_flags = 0; >> + __be32 nsp = 0; >> >> nla_for_each_nested(a, attr, rem) { >> int type = nla_type(a); >> @@ -1246,6 +1247,7 @@ int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, >> [OVS_TUNNEL_KEY_ATTR_TTL] = 1, >> [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, >> [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, >> + [OVS_TUNNEL_KEY_ATTR_NSP] = sizeof(u32), >> }; >> >> if (type > OVS_TUNNEL_KEY_ATTR_MAX) { >> @@ -1290,11 +1292,16 @@ int ovs_ipv4_tun_from_nlattr(const struct nlattr >> *attr, >> case OVS_TUNNEL_KEY_ATTR_CSUM: >> tun_flags |= TUNNEL_CSUM; >> break; >> + case OVS_TUNNEL_KEY_ATTR_NSP: >> + nsp |= htonl(be32_to_cpu(nla_get_be32(a)) << 8); > > Is the "|=" operator intentional here? If there were two of these attributes > present, > is the intention to OR them together?
The intention is to store 24 bits of nsi and 8 bits of nsp in same 32 bit nsp field, as shown in patch 4, line 50. I will fix this so that individual patches are self contained. Also i will try to document it so that the intention is clear. > >> + tun_flags |= TUNNEL_NSP; >> + break; >> default: >> return -EINVAL; >> } >> } >> > ... >> diff --git a/datapath/flow.h b/datapath/flow.h >> index 03eae03..b316e0a 100644 >> --- a/datapath/flow.h >> +++ b/datapath/flow.h >> @@ -51,6 +51,7 @@ struct sw_flow_actions { >> >> struct ovs_key_ipv4_tunnel { >> __be64 tun_id; >> + __be32 nsp; > > I'd like a comment here mentioning the storage if nsi in place of the reserved > bits. actually nsi and nsp are both stored in same field (nsi take the place where reserved field is), i will document this. > >> __be32 ipv4_src; >> __be32 ipv4_dst; >> __be16 tun_flags; >> @@ -60,9 +61,10 @@ struct ovs_key_ipv4_tunnel { >> >> > ... >> --- /dev/null >> +++ b/datapath/linux/compat/include/net/nsh.h >> @@ -0,0 +1,86 @@ >> +/* >> + * Copyright (c) 2013 Cisco Systems, Inc. >> + * >> + * This program is free software; you can redistribute it and/or >> + * modify it under the terms of version 2 of the GNU General Public >> + * License as published by the Free Software Foundation. >> + * >> + * This program is distributed in the hope that it will be useful, but >> + * WITHOUT ANY WARRANTY; without even the implied warranty of >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> + * General Public License for more details. >> + * >> + * You should have received a copy of the GNU General Public License >> + * along with this program; if not, write to the Free Software >> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA >> + * 02110-1301, USA >> + */ >> + >> +#ifndef NSH_H >> +#define NSH_H 1 >> + >> +#include <linux/types.h> >> +#include <asm/byteorder.h> >> + >> + >> +/** >> + * struct nsh_bhdr - Network Service Base Header. >> + * @o: Operations and Management Packet indicator bit >> + * @c: If this bit is set then one or more contexts are in use. >> + * @proto: IEEE Ethertypes to indicate the frame within. >> + * @svc_idx: TTL functionality and location within service path. >> + * @svc_path: To uniquely identify service path. >> + */ >> +struct nsh_base { >> +#if defined(__LITTLE_ENDIAN_BITFIELD) >> + __u8 res:6, >> + c:1, >> + o:1; >> +#elif defined(__BIG_ENDIAN_BITFIELD) >> + __u8 o:1, >> + c:1, >> + res:6; >> +#else >> +#error "Bitfield Endianess not defined." >> +#endif >> + __be16 proto; > > I know this is not your call, but I find it weird that they chose to place > the proto field literally in the middle of a 32-bit field. my exact thoughts when i saw the header first time and tried asking to change this, kinda weird, anyway i was told that it remains same. > Taking this set > of fields it would be trivial to come up with a more logical order of fields. very true i agree :) > >> + __u8 svc_idx; >> + __be32 svc_path; > > Since the path is only the highest 24 bits, this would benefit from a comment. yep will add comments here. > >> +}__attribute__((packed)); >> + >> +/** >> + * struct nsh_ctx - Keeps track of NSH context data >> + * @npc: NSH network platform context >> + * @nsc: NSH network shared context >> + * @spc: NSH service platform context >> + * @ssc: NSH service shared context >> + */ >> +struct nsh_ctx { >> + __be32 npc; >> + __be32 nsc; >> + __be32 spc; >> + __be32 ssc; >> +}; >> + >> +/** >> + * struct nshdr - Network Service header >> + * @nsh_base: Network Service Base Header. >> + * @nsh_ctx: Network Service Context Header. >> + */ >> +struct nshhdr { >> + struct nsh_base b; >> + struct nsh_ctx c; >> +}; >> + >> + >> +/* NSH Header Length */ >> +#define NSH_HLEN (sizeof(struct udphdr) + \ >> + sizeof(struct vxlanhdr) + \ >> + sizeof(struct nshhdr)) > > Is this named correctly and in the right place? I guess this header should > not be vxlan dependent. actually i named it after vxlan_hlen in vxlan.c:63, yep they both are named incorrectly. i will change it and move it out. > >> +#define NSH_DST_PORT 9030 /* UDP Port for NSH on VXLAN */ >> +#define NSH_P_TEB 0x6558 /* Transparent Ethernet Bridging */ >> +#define NSH_M_NSP 0xFFFFFF00 >> +#define NSH_M_NSI 0x000000FF > > The masks are a bit confusing, as reading this header alone does not tell > that they are actually about the way nsp and nsi are stored within the tunnel > key. > I think these should be in flow.h right after the tunnel key struct rather > than > here. true, will do. > >> + >> + >> +#endif /* nsh.h */ >> diff --git a/datapath/linux/compat/include/net/vxlan.h >> b/datapath/linux/compat/include/net/vxlan.h >> index 3ac816b..1c15dfb 100644 >> --- a/datapath/linux/compat/include/net/vxlan.h >> +++ b/datapath/linux/compat/include/net/vxlan.h >> @@ -4,9 +4,11 @@ >> #include <linux/skbuff.h> >> #include <linux/netdevice.h> >> #include <linux/udp.h> >> +#include <net/nsh.h> >> >> struct vxlan_sock; >> -typedef void (vxlan_rcv_t)(struct vxlan_sock *vs, struct sk_buff *skb, >> __be32 key); >> +typedef void (vxlan_rcv_t)(struct vxlan_sock *vs, struct sk_buff *skb, >> + __be32 key, __be32 nsp); >> >> /* per UDP socket information */ >> struct vxlan_sock { >> @@ -27,7 +29,7 @@ void vxlan_sock_release(struct vxlan_sock *vs); >> int vxlan_xmit_skb(struct vxlan_sock *vs, >> struct rtable *rt, struct sk_buff *skb, >> __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, >> - __be16 src_port, __be16 dst_port, __be32 vni); >> + __be16 src_port, __be16 dst_port, __be32 vni, __be32 nsp); > > I'm not exactly sure on the policy about the compat files, but I think they > should be about compatibility with older kernels. Hence I'm not sure if > this is the right place to introduce new functionality. Could someone > comment on this? yep i was not sure of it as well. > > Also, I'd like to see NSH implemented in a bit more general fashion, so > that it could be also used on top of GRE, like the examples in the RFC show. Actually I was already working on that for last few days, should have this in more general way. mainly the essential logic shown here remains same but would be in more generic implementation. > >> >> __be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb); >> >> diff --git a/datapath/linux/compat/vxlan.c b/datapath/linux/compat/vxlan.c >> index 4f7671b..8a6d864 100644 >> --- a/datapath/linux/compat/vxlan.c >> +++ b/datapath/linux/compat/vxlan.c >> @@ -50,6 +50,7 @@ >> #include <net/net_namespace.h> >> #include <net/netns/generic.h> >> #include <net/vxlan.h> >> +#include <net/nsh.h> >> >> #include "compat.h" >> #include "gso.h" >> @@ -89,6 +90,16 @@ static inline struct hlist_head *vs_head(struct net *net, >> __be16 port) >> return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)]; >> } >> >> +static inline struct vxlanhdr *vxlan_hdr(const struct sk_buff *skb) >> +{ >> + return (struct vxlanhdr *)(udp_hdr(skb) + 1); >> +} >> + >> +static inline struct nshhdr *nsh_hdr(const struct sk_buff *skb) >> +{ >> + return (struct nshhdr *)(vxlan_hdr(skb) + 1); >> +} >> + >> /* Find VXLAN socket based on network namespace and UDP port */ >> >> static struct vxlan_sock *vxlan_find_sock(struct net *net, __be16 port) >> @@ -107,13 +118,20 @@ static int vxlan_udp_encap_recv(struct sock *sk, >> struct sk_buff *skb) >> { >> struct vxlan_sock *vs; >> struct vxlanhdr *vxh; >> + struct udphdr *udp; >> + bool isnsh = false; >> + __be32 nsp = 0; >> + >> + udp = (struct udphdr *)udp_hdr(skb); >> + if (udp->dest == htons(NSH_DST_PORT)) >> + isnsh = true; >> >> /* Need Vxlan and inner Ethernet header to be present */ >> - if (!pskb_may_pull(skb, VXLAN_HLEN)) >> + if (!pskb_may_pull(skb, isnsh ? NSH_HLEN : VXLAN_HLEN)) >> goto error; >> >> /* Return packets with reserved bits set */ >> - vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1); >> + vxh = vxlan_hdr(skb); >> if (vxh->vx_flags != htonl(VXLAN_FLAGS) || >> (vxh->vx_vni & htonl(0xff))) { >> pr_warn("invalid vxlan flags=%#x vni=%#x\n", >> @@ -121,14 +139,32 @@ static int vxlan_udp_encap_recv(struct sock *sk, >> struct sk_buff *skb) >> goto error; >> } >> >> - if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB))) >> + if (isnsh) { >> + struct nshhdr *nsh = nsh_hdr(skb); >> + >> + if (unlikely(nsh->b.svc_idx == 0)) { >> + pr_warn("NSH service index reached zero\n"); >> + goto drop; >> + } >> + >> + if (unlikely(nsh->b.svc_path & htonl(NSH_M_NSI))) { > > I guess you should use ~NSH_M_NSP here instead? In the end it is the same, > but it would be less confusing, as the b.svc_path field does not contain an > NSI. yep, will do. > Also, the usual treatment of reserved bits is ignore on reception, rather than > dropping. hmm. i didn't quite get what you are saying here, vxlan_vni and svc_path both use upper 24bits and their detection seems same to me here: vxlan.c:118: if (vxh->vx_flags != htonl(VXLAN_FLAGS) || (vxh->vx_vni & htonl(0xff))) { or if (unlikely(nsh->b.svc_path & htonl(NSH_M_NSI))) { // NSH_M_NSI ==> 0x000000ff which is same as 0xff above? am i missing something here? > >> + pr_warn("invalid NSH service path=%#x\n", >> + ntohl(nsh->b.svc_path)); >> + goto drop; >> + } >> + >> + nsp = nsh->b.svc_path | htonl(nsh->b.svc_idx); >> + } >> + > ... >> >> --- a/include/linux/openvswitch.h >> +++ b/include/linux/openvswitch.h >> @@ -304,6 +304,7 @@ enum ovs_tunnel_key_attr { >> OVS_TUNNEL_KEY_ATTR_TTL, /* u8 Tunnel IP TTL. */ >> OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT, /* No argument, set DF. */ >> OVS_TUNNEL_KEY_ATTR_CSUM, /* No argument. CSUM packet. */ >> + OVS_TUNNEL_KEY_ATTR_NSP, /* be32 NSH service path */ > > Should state here that only lowest 24 bits are used. yep will mention it, its actually upper 24 bits. > >> __OVS_TUNNEL_KEY_ATTR_MAX >> }; >> >> -- >> 1.7.9.5 >> >> _______________________________________________ >> dev mailing list >> dev@openvswitch.org >> http://openvswitch.org/mailman/listinfo/dev > _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev