On Fri, Jul 19, 2013 at 6:28 AM, Kyle Mestery (kmestery) <kmest...@cisco.com> wrote: > > On Jul 18, 2013, at 5:22 PM, Pravin B Shelar <pshe...@nicira.com> wrote: > >> Following patch restructures vxlan tunneling so that it is more >> in sync with upstream vxlan tunneling code. >> >> Signed-off-by: Pravin Shelar <pshe...@nicira.com> >> --- >> v3-v2: >> - Moved kernel version in flow_dissector check to top. >> v1-v2: >> - Added create flag to vxlan-port add. >> - Moved rxhash functions to flow_dissector.c >> --- >> datapath/compat.h | 6 + >> datapath/linux/Modules.mk | 6 +- >> datapath/linux/compat/flow_dissector.c | 203 +++++++++++ >> datapath/linux/compat/include/linux/in.h | 20 ++ >> datapath/linux/compat/include/linux/skbuff.h | 22 ++ >> datapath/linux/compat/include/net/flow_keys.h | 22 ++ >> datapath/linux/compat/include/net/ip.h | 7 + >> datapath/linux/compat/include/net/ipv6.h | 15 + >> datapath/linux/compat/include/net/vxlan.h | 43 +++ >> datapath/linux/compat/vxlan.c | 457 >> +++++++++++++++++++++++++ >> datapath/vport-vxlan.c | 221 +++++-------- >> 11 files changed, 877 insertions(+), 145 deletions(-) >> create mode 100644 datapath/linux/compat/flow_dissector.c >> create mode 100644 datapath/linux/compat/include/net/flow_keys.h >> create mode 100644 datapath/linux/compat/include/net/vxlan.h >> create mode 100644 datapath/linux/compat/vxlan.c >> >> diff --git a/datapath/compat.h b/datapath/compat.h >> index a6a01d5..4dfd192 100644 >> --- a/datapath/compat.h >> +++ b/datapath/compat.h >> @@ -100,4 +100,10 @@ static inline void skb_set_mark(struct sk_buff *skb, >> u32 mark) >> #define rt_dst(rt) (rt->u.dst) >> #endif >> >> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33) >> +#define inet_sport(sk) (inet_sk(sk)->sport) >> +#else >> +#define inet_sport(sk) (inet_sk(sk)->inet_sport) >> +#endif >> + >> #endif /* compat.h */ >> diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk >> index dcacc79..edaeabb 100644 >> --- a/datapath/linux/Modules.mk >> +++ b/datapath/linux/Modules.mk >> @@ -3,6 +3,7 @@ openvswitch_sources += \ >> linux/compat/dev-openvswitch.c \ >> linux/compat/exthdrs_core.c \ >> linux/compat/flex_array.c \ >> + linux/compat/flow_dissector.c \ >> linux/compat/gre.c \ >> linux/compat/gso.c \ >> linux/compat/genetlink-openvswitch.c \ >> @@ -14,6 +15,7 @@ openvswitch_sources += \ >> linux/compat/reciprocal_div.c \ >> linux/compat/skbuff-openvswitch.c \ >> linux/compat/time.c \ >> + linux/compat/vxlan.c \ >> linux/compat/workqueue.c >> openvswitch_headers += \ >> linux/compat/gso.h \ >> @@ -65,6 +67,7 @@ openvswitch_headers += \ >> linux/compat/include/linux/workqueue.h \ >> linux/compat/include/net/checksum.h \ >> linux/compat/include/net/dst.h \ >> + linux/compat/include/net/flow_keys.h \ >> linux/compat/include/net/genetlink.h \ >> linux/compat/include/net/gre.h \ >> linux/compat/include/net/inet_frag.h \ >> @@ -76,4 +79,5 @@ openvswitch_headers += \ >> linux/compat/include/net/protocol.h \ >> linux/compat/include/net/route.h \ >> linux/compat/include/net/sock.h \ >> - linux/compat/include/net/netns/generic.h >> + linux/compat/include/net/netns/generic.h \ >> + linux/compat/include/net/vxlan.h >> diff --git a/datapath/linux/compat/flow_dissector.c >> b/datapath/linux/compat/flow_dissector.c >> new file mode 100644 >> index 0000000..c2078d6 >> --- /dev/null >> +++ b/datapath/linux/compat/flow_dissector.c >> @@ -0,0 +1,203 @@ >> + >> +#include <linux/version.h> >> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) >> +#include <linux/ip.h> >> +#include <linux/ipv6.h> >> +#include <linux/if_vlan.h> >> +#include <net/ip.h> >> +#include <net/ipv6.h> >> +#include <linux/igmp.h> >> +#include <linux/icmp.h> >> +#include <linux/sctp.h> >> +#include <linux/dccp.h> >> +#include <linux/if_tunnel.h> >> +#include <linux/if_pppox.h> >> +#include <linux/ppp_defs.h> >> +#include <net/flow_keys.h> >> + > This file appears to be missing license and copyright information. > >> + >> +/* copy saddr & daddr, possibly using 64bit load/store >> + * Equivalent to : flow->src = iph->saddr; >> + * flow->dst = iph->daddr; >> + */ >> +static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct >> iphdr *iph) >> +{ >> + BUILD_BUG_ON(offsetof(typeof(*flow), dst) != >> + offsetof(typeof(*flow), src) + sizeof(flow->src)); >> + memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + sizeof(flow->dst)); >> +} >> + >> +static bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys >> *flow) >> +{ >> + int poff, nhoff = skb_network_offset(skb); >> + u8 ip_proto; >> + __be16 proto = skb->protocol; >> + >> + memset(flow, 0, sizeof(*flow)); >> + >> +again: >> + switch (proto) { >> + case __constant_htons(ETH_P_IP): { >> + const struct iphdr *iph; >> + struct iphdr _iph; >> +ip: >> + iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); >> + if (!iph) >> + return false; >> + >> + if (ip_is_fragment(iph)) >> + ip_proto = 0; >> + else >> + ip_proto = iph->protocol; >> + iph_to_flow_copy_addrs(flow, iph); >> + nhoff += iph->ihl * 4; >> + break; >> + } >> + case __constant_htons(ETH_P_IPV6): { >> + const struct ipv6hdr *iph; >> + struct ipv6hdr _iph; >> +ipv6: >> + iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); >> + if (!iph) >> + return false; >> + >> + ip_proto = iph->nexthdr; >> + flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr); >> + flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); >> + nhoff += sizeof(struct ipv6hdr); >> + break; >> + } >> + case __constant_htons(ETH_P_8021Q): { >> + const struct vlan_hdr *vlan; >> + struct vlan_hdr _vlan; >> + >> + vlan = skb_header_pointer(skb, nhoff, sizeof(_vlan), &_vlan); >> + if (!vlan) >> + return false; >> + >> + proto = vlan->h_vlan_encapsulated_proto; >> + nhoff += sizeof(*vlan); >> + goto again; >> + } >> + case __constant_htons(ETH_P_PPP_SES): { >> + struct { >> + struct pppoe_hdr hdr; >> + __be16 proto; >> + } *hdr, _hdr; >> + hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr); >> + if (!hdr) >> + return false; >> + proto = hdr->proto; >> + nhoff += PPPOE_SES_HLEN; >> + switch (proto) { >> + case __constant_htons(PPP_IP): >> + goto ip; >> + case __constant_htons(PPP_IPV6): >> + goto ipv6; >> + default: >> + return false; >> + } >> + } >> + default: >> + return false; >> + } >> + >> + switch (ip_proto) { >> + case IPPROTO_GRE: { >> + struct gre_hdr { >> + __be16 flags; >> + __be16 proto; >> + } *hdr, _hdr; >> + >> + hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr); >> + if (!hdr) >> + return false; >> + /* >> + * Only look inside GRE if version zero and no >> + * routing >> + */ >> + if (!(hdr->flags & (GRE_VERSION|GRE_ROUTING))) { >> + proto = hdr->proto; >> + nhoff += 4; >> + if (hdr->flags & GRE_CSUM) >> + nhoff += 4; >> + if (hdr->flags & GRE_KEY) >> + nhoff += 4; >> + if (hdr->flags & GRE_SEQ) >> + nhoff += 4; >> + if (proto == htons(ETH_P_TEB)) { >> + const struct ethhdr *eth; >> + struct ethhdr _eth; >> + >> + eth = skb_header_pointer(skb, nhoff, >> + sizeof(_eth), &_eth); >> + if (!eth) >> + return false; >> + proto = eth->h_proto; >> + nhoff += sizeof(*eth); >> + } >> + goto again; >> + } >> + break; >> + } >> + case IPPROTO_IPIP: >> + goto again; >> + default: >> + break; >> + } >> + >> + flow->ip_proto = ip_proto; >> + poff = proto_ports_offset(ip_proto); >> + if (poff >= 0) { >> + __be32 *ports, _ports; >> + >> + nhoff += poff; >> + ports = skb_header_pointer(skb, nhoff, sizeof(_ports), >> &_ports); >> + if (ports) >> + flow->ports = *ports; >> + } >> + >> + flow->thoff = (u16) nhoff; >> + >> + return true; >> +} >> + >> +static u32 hashrnd __read_mostly; >> + >> +static void init_hashrnd(void) >> +{ >> + if (likely(hashrnd)) >> + return; >> + get_random_bytes(&hashrnd, sizeof(hashrnd)); >> +} >> + >> +u32 __skb_get_rxhash(struct sk_buff *skb) >> +{ >> + struct flow_keys keys; >> + u32 hash; >> + >> + if (!skb_flow_dissect(skb, &keys)) >> + return 0; >> + >> + /* get a consistent hash (same value on both flow directions) */ >> + if (((__force u32)keys.dst < (__force u32)keys.src) || >> + (((__force u32)keys.dst == (__force u32)keys.src) && >> + ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) { >> + swap(keys.dst, keys.src); >> + swap(keys.port16[0], keys.port16[1]); >> + } >> + >> + init_hashrnd(); >> + >> + hash = jhash_3words((__force u32)keys.dst, >> + (__force u32)keys.src, >> + (__force u32)keys.ports, hashrnd); >> + if (!hash) >> + hash = 1; >> + >> +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,34) >> + skb->rxhash = hash; >> +#endif >> + return hash; >> +} >> +#endif >> diff --git a/datapath/linux/compat/include/linux/in.h >> b/datapath/linux/compat/include/linux/in.h >> index f91a832..fa2e026 100644 >> --- a/datapath/linux/compat/include/linux/in.h >> +++ b/datapath/linux/compat/include/linux/in.h >> @@ -3,6 +3,26 @@ >> >> #include_next <linux/in.h> >> >> +#include <linux/module.h> >> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) >> +static inline int proto_ports_offset(int proto) >> +{ >> + switch (proto) { >> + case IPPROTO_TCP: >> + case IPPROTO_UDP: >> + case IPPROTO_DCCP: >> + case IPPROTO_ESP: /* SPI */ >> + case IPPROTO_SCTP: >> + case IPPROTO_UDPLITE: >> + return 0; >> + case IPPROTO_AH: /* SPI */ >> + return 4; >> + default: >> + return -EINVAL; >> + } >> +} >> +#endif >> + >> #ifndef HAVE_IPV4_IS_MULTICAST >> >> static inline bool ipv4_is_loopback(__be32 addr) >> diff --git a/datapath/linux/compat/include/linux/skbuff.h >> b/datapath/linux/compat/include/linux/skbuff.h >> index d485b39..c9c103d 100644 >> --- a/datapath/linux/compat/include/linux/skbuff.h >> +++ b/datapath/linux/compat/include/linux/skbuff.h >> @@ -251,4 +251,26 @@ static inline void skb_reset_mac_len(struct sk_buff >> *skb) >> skb->mac_len = skb->network_header - skb->mac_header; >> } >> #endif >> + >> +static inline int skb_unclone(struct sk_buff *skb, gfp_t pri) >> +{ >> + might_sleep_if(pri & __GFP_WAIT); >> + >> + if (skb_cloned(skb)) >> + return pskb_expand_head(skb, 0, 0, pri); >> + >> + return 0; >> +} >> + >> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) >> +extern u32 __skb_get_rxhash(struct sk_buff *skb); >> +static inline __u32 skb_get_rxhash(struct sk_buff *skb) >> +{ >> +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,34) >> + if (!skb->rxhash) >> +#endif >> + return __skb_get_rxhash(skb); >> +} >> +#endif >> + >> #endif >> diff --git a/datapath/linux/compat/include/net/flow_keys.h >> b/datapath/linux/compat/include/net/flow_keys.h >> new file mode 100644 >> index 0000000..4de17d1 >> --- /dev/null >> +++ b/datapath/linux/compat/include/net/flow_keys.h >> @@ -0,0 +1,22 @@ >> +#ifndef _NET_FLOW_KEYS_WRAPPER_H >> +#define _NET_FLOW_KEYS_WRAPPER_H >> + >> +#include <linux/version.h> >> + >> +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,3,0) >> +#include_next <net/flow_keys.h> >> +#else >> +struct flow_keys { >> + /* (src,dst) must be grouped, in the same way than in IP header */ >> + __be32 src; >> + __be32 dst; >> + union { >> + __be32 ports; >> + __be16 port16[2]; >> + }; >> + u16 thoff; >> + u8 ip_proto; >> +}; >> +#endif >> + >> +#endif >> diff --git a/datapath/linux/compat/include/net/ip.h >> b/datapath/linux/compat/include/net/ip.h >> index b18b968..1dccdea 100644 >> --- a/datapath/linux/compat/include/net/ip.h >> +++ b/datapath/linux/compat/include/net/ip.h >> @@ -11,4 +11,11 @@ extern int ip_local_out(struct sk_buff *skb); >> >> #endif /* linux kernel < 2.6.25 */ >> >> +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0) >> +static inline bool ip_is_fragment(const struct iphdr *iph) >> +{ >> + return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0; >> +} >> +#endif >> + >> #endif >> diff --git a/datapath/linux/compat/include/net/ipv6.h >> b/datapath/linux/compat/include/net/ipv6.h >> index d1e3248..7ab234a 100644 >> --- a/datapath/linux/compat/include/net/ipv6.h >> +++ b/datapath/linux/compat/include/net/ipv6.h >> @@ -23,4 +23,19 @@ enum { >> extern int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, >> int target, unsigned short *fragoff, int *fragflg); >> >> +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) >> +static inline u32 ipv6_addr_hash(const struct in6_addr *a) >> +{ >> +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 >> + const unsigned long *ul = (const unsigned long *)a; >> + unsigned long x = ul[0] ^ ul[1]; >> + >> + return (u32)(x ^ (x >> 32)); >> +#else >> + return (__force u32)(a->s6_addr32[0] ^ a->s6_addr32[1] ^ >> + a->s6_addr32[2] ^ a->s6_addr32[3]); >> +#endif >> +} >> +#endif >> + >> #endif >> diff --git a/datapath/linux/compat/include/net/vxlan.h >> b/datapath/linux/compat/include/net/vxlan.h >> new file mode 100644 >> index 0000000..102bc0c >> --- /dev/null >> +++ b/datapath/linux/compat/include/net/vxlan.h >> @@ -0,0 +1,43 @@ >> +#ifndef __NET_VXLAN_WRAPPER_H >> +#define __NET_VXLAN_WRAPPER_H 1 >> + >> +#include <linux/skbuff.h> >> +#include <linux/netdevice.h> >> +#include <linux/udp.h> >> + >> +/* per UDP socket information */ >> +struct vxlan_sock { >> + struct hlist_node hlist; >> + struct rcu_head rcu; >> + struct socket *sock; >> + struct list_head handler_list; >> +}; >> + >> +struct vxlan_handler; >> +typedef int (vxlan_rcv_t)(struct vxlan_handler *vh, struct sk_buff *skb, >> __be32 key); >> + >> +struct vxlan_handler { >> + vxlan_rcv_t *rcv; >> + struct list_head node; >> + void *data; >> + struct vxlan_sock *vs; >> + atomic_t refcnt; >> + struct rcu_head rcu; >> + struct work_struct del_work; >> + int priority; >> +}; >> + >> +void vxlan_handler_put(struct vxlan_handler *vh); >> + >> +struct vxlan_handler *vxlan_handler_add(struct net *net, >> + __be16 portno, vxlan_rcv_t *rcv, >> + void *data, int priority, bool create); >> + >> +int vxlan_xmit_skb(struct net *net, struct vxlan_handler *vh, >> + struct rtable *rt, struct sk_buff *skb, >> + __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, >> + __be16 src_port, __be16 dst_port, __be32 vni); >> + >> +__be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb); >> + >> +#endif >> diff --git a/datapath/linux/compat/vxlan.c b/datapath/linux/compat/vxlan.c >> new file mode 100644 >> index 0000000..b41ecc2 >> --- /dev/null >> +++ b/datapath/linux/compat/vxlan.c >> @@ -0,0 +1,457 @@ >> +#include <linux/kernel.h> >> +#include <linux/types.h> >> +#include <linux/module.h> >> +#include <linux/errno.h> >> +#include <linux/slab.h> >> +#include <linux/skbuff.h> >> +#include <linux/rculist.h> >> +#include <linux/netdevice.h> >> +#include <linux/in.h> >> +#include <linux/ip.h> >> +#include <linux/udp.h> >> +#include <linux/igmp.h> >> +#include <linux/etherdevice.h> >> +#include <linux/if_ether.h> >> +#include <linux/if_vlan.h> >> +#include <linux/hash.h> >> +#include <linux/ethtool.h> >> +#include <net/arp.h> >> +#include <net/ndisc.h> >> +#include <net/ip.h> >> +#include <net/ip_tunnels.h> >> +#include <net/icmp.h> >> +#include <net/udp.h> >> +#include <net/rtnetlink.h> >> +#include <net/route.h> >> +#include <net/dsfield.h> >> +#include <net/inet_ecn.h> >> +#include <net/net_namespace.h> >> +#include <net/netns/generic.h> >> +#include <net/vxlan.h> >> + > Same thing here, no license or copyright. >
Most of files in compat directory does not have copyright notice, anyways I will add it. _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev