On Jul 19, 2013, at 3:31 PM, Pravin Shelar <pshe...@nicira.com> wrote: > On Fri, Jul 19, 2013 at 6:28 AM, Kyle Mestery (kmestery) > <kmest...@cisco.com> wrote: >> >> On Jul 18, 2013, at 5:22 PM, Pravin B Shelar <pshe...@nicira.com> wrote: >> >>> Following patch restructures vxlan tunneling so that it is more >>> in sync with upstream vxlan tunneling code. >>> >>> Signed-off-by: Pravin Shelar <pshe...@nicira.com> >>> --- >>> v3-v2: >>> - Moved kernel version in flow_dissector check to top. >>> v1-v2: >>> - Added create flag to vxlan-port add. >>> - Moved rxhash functions to flow_dissector.c >>> --- >>> datapath/compat.h | 6 + >>> datapath/linux/Modules.mk | 6 +- >>> datapath/linux/compat/flow_dissector.c | 203 +++++++++++ >>> datapath/linux/compat/include/linux/in.h | 20 ++ >>> datapath/linux/compat/include/linux/skbuff.h | 22 ++ >>> datapath/linux/compat/include/net/flow_keys.h | 22 ++ >>> datapath/linux/compat/include/net/ip.h | 7 + >>> datapath/linux/compat/include/net/ipv6.h | 15 + >>> datapath/linux/compat/include/net/vxlan.h | 43 +++ >>> datapath/linux/compat/vxlan.c | 457 >>> +++++++++++++++++++++++++ >>> datapath/vport-vxlan.c | 221 +++++-------- >>> 11 files changed, 877 insertions(+), 145 deletions(-) >>> create mode 100644 datapath/linux/compat/flow_dissector.c >>> create mode 100644 datapath/linux/compat/include/net/flow_keys.h >>> create mode 100644 datapath/linux/compat/include/net/vxlan.h >>> create mode 100644 datapath/linux/compat/vxlan.c >>> >>> diff --git a/datapath/compat.h b/datapath/compat.h >>> index a6a01d5..4dfd192 100644 >>> --- a/datapath/compat.h >>> +++ b/datapath/compat.h >>> @@ -100,4 +100,10 @@ static inline void skb_set_mark(struct sk_buff *skb, >>> u32 mark) >>> #define rt_dst(rt) (rt->u.dst) >>> #endif >>> >>> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33) >>> +#define inet_sport(sk) (inet_sk(sk)->sport) >>> +#else >>> +#define inet_sport(sk) (inet_sk(sk)->inet_sport) >>> +#endif >>> + >>> #endif /* compat.h */ >>> diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk >>> index dcacc79..edaeabb 100644 >>> --- a/datapath/linux/Modules.mk >>> +++ b/datapath/linux/Modules.mk >>> @@ -3,6 +3,7 @@ openvswitch_sources += \ >>> linux/compat/dev-openvswitch.c \ >>> linux/compat/exthdrs_core.c \ >>> linux/compat/flex_array.c \ >>> + linux/compat/flow_dissector.c \ >>> linux/compat/gre.c \ >>> linux/compat/gso.c \ >>> linux/compat/genetlink-openvswitch.c \ >>> @@ -14,6 +15,7 @@ openvswitch_sources += \ >>> linux/compat/reciprocal_div.c \ >>> linux/compat/skbuff-openvswitch.c \ >>> linux/compat/time.c \ >>> + linux/compat/vxlan.c \ >>> linux/compat/workqueue.c >>> openvswitch_headers += \ >>> linux/compat/gso.h \ >>> @@ -65,6 +67,7 @@ openvswitch_headers += \ >>> linux/compat/include/linux/workqueue.h \ >>> linux/compat/include/net/checksum.h \ >>> linux/compat/include/net/dst.h \ >>> + linux/compat/include/net/flow_keys.h \ >>> linux/compat/include/net/genetlink.h \ >>> linux/compat/include/net/gre.h \ >>> linux/compat/include/net/inet_frag.h \ >>> @@ -76,4 +79,5 @@ openvswitch_headers += \ >>> linux/compat/include/net/protocol.h \ >>> linux/compat/include/net/route.h \ >>> linux/compat/include/net/sock.h \ >>> - linux/compat/include/net/netns/generic.h >>> + linux/compat/include/net/netns/generic.h \ >>> + linux/compat/include/net/vxlan.h >>> diff --git a/datapath/linux/compat/flow_dissector.c >>> b/datapath/linux/compat/flow_dissector.c >>> new file mode 100644 >>> index 0000000..c2078d6 >>> --- /dev/null >>> +++ b/datapath/linux/compat/flow_dissector.c >>> @@ -0,0 +1,203 @@ >>> + >>> +#include <linux/version.h> >>> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) >>> +#include <linux/ip.h> >>> +#include <linux/ipv6.h> >>> +#include <linux/if_vlan.h> >>> +#include <net/ip.h> >>> +#include <net/ipv6.h> >>> +#include <linux/igmp.h> >>> +#include <linux/icmp.h> >>> +#include <linux/sctp.h> >>> +#include <linux/dccp.h> >>> +#include <linux/if_tunnel.h> >>> +#include <linux/if_pppox.h> >>> +#include <linux/ppp_defs.h> >>> +#include <net/flow_keys.h> >>> + >> This file appears to be missing license and copyright information. >> >>> + >>> +/* copy saddr & daddr, possibly using 64bit load/store >>> + * Equivalent to : flow->src = iph->saddr; >>> + * flow->dst = iph->daddr; >>> + */ >>> +static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct >>> iphdr *iph) >>> +{ >>> + BUILD_BUG_ON(offsetof(typeof(*flow), dst) != >>> + offsetof(typeof(*flow), src) + sizeof(flow->src)); >>> + memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + >>> sizeof(flow->dst)); >>> +} >>> + >>> +static bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys >>> *flow) >>> +{ >>> + int poff, nhoff = skb_network_offset(skb); >>> + u8 ip_proto; >>> + __be16 proto = skb->protocol; >>> + >>> + memset(flow, 0, sizeof(*flow)); >>> + >>> +again: >>> + switch (proto) { >>> + case __constant_htons(ETH_P_IP): { >>> + const struct iphdr *iph; >>> + struct iphdr _iph; >>> +ip: >>> + iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); >>> + if (!iph) >>> + return false; >>> + >>> + if (ip_is_fragment(iph)) >>> + ip_proto = 0; >>> + else >>> + ip_proto = iph->protocol; >>> + iph_to_flow_copy_addrs(flow, iph); >>> + nhoff += iph->ihl * 4; >>> + break; >>> + } >>> + case __constant_htons(ETH_P_IPV6): { >>> + const struct ipv6hdr *iph; >>> + struct ipv6hdr _iph; >>> +ipv6: >>> + iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); >>> + if (!iph) >>> + return false; >>> + >>> + ip_proto = iph->nexthdr; >>> + flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr); >>> + flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); >>> + nhoff += sizeof(struct ipv6hdr); >>> + break; >>> + } >>> + case __constant_htons(ETH_P_8021Q): { >>> + const struct vlan_hdr *vlan; >>> + struct vlan_hdr _vlan; >>> + >>> + vlan = skb_header_pointer(skb, nhoff, sizeof(_vlan), &_vlan); >>> + if (!vlan) >>> + return false; >>> + >>> + proto = vlan->h_vlan_encapsulated_proto; >>> + nhoff += sizeof(*vlan); >>> + goto again; >>> + } >>> + case __constant_htons(ETH_P_PPP_SES): { >>> + struct { >>> + struct pppoe_hdr hdr; >>> + __be16 proto; >>> + } *hdr, _hdr; >>> + hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr); >>> + if (!hdr) >>> + return false; >>> + proto = hdr->proto; >>> + nhoff += PPPOE_SES_HLEN; >>> + switch (proto) { >>> + case __constant_htons(PPP_IP): >>> + goto ip; >>> + case __constant_htons(PPP_IPV6): >>> + goto ipv6; >>> + default: >>> + return false; >>> + } >>> + } >>> + default: >>> + return false; >>> + } >>> + >>> + switch (ip_proto) { >>> + case IPPROTO_GRE: { >>> + struct gre_hdr { >>> + __be16 flags; >>> + __be16 proto; >>> + } *hdr, _hdr; >>> + >>> + hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr); >>> + if (!hdr) >>> + return false; >>> + /* >>> + * Only look inside GRE if version zero and no >>> + * routing >>> + */ >>> + if (!(hdr->flags & (GRE_VERSION|GRE_ROUTING))) { >>> + proto = hdr->proto; >>> + nhoff += 4; >>> + if (hdr->flags & GRE_CSUM) >>> + nhoff += 4; >>> + if (hdr->flags & GRE_KEY) >>> + nhoff += 4; >>> + if (hdr->flags & GRE_SEQ) >>> + nhoff += 4; >>> + if (proto == htons(ETH_P_TEB)) { >>> + const struct ethhdr *eth; >>> + struct ethhdr _eth; >>> + >>> + eth = skb_header_pointer(skb, nhoff, >>> + sizeof(_eth), &_eth); >>> + if (!eth) >>> + return false; >>> + proto = eth->h_proto; >>> + nhoff += sizeof(*eth); >>> + } >>> + goto again; >>> + } >>> + break; >>> + } >>> + case IPPROTO_IPIP: >>> + goto again; >>> + default: >>> + break; >>> + } >>> + >>> + flow->ip_proto = ip_proto; >>> + poff = proto_ports_offset(ip_proto); >>> + if (poff >= 0) { >>> + __be32 *ports, _ports; >>> + >>> + nhoff += poff; >>> + ports = skb_header_pointer(skb, nhoff, sizeof(_ports), >>> &_ports); >>> + if (ports) >>> + flow->ports = *ports; >>> + } >>> + >>> + flow->thoff = (u16) nhoff; >>> + >>> + return true; >>> +} >>> + >>> +static u32 hashrnd __read_mostly; >>> + >>> +static void init_hashrnd(void) >>> +{ >>> + if (likely(hashrnd)) >>> + return; >>> + get_random_bytes(&hashrnd, sizeof(hashrnd)); >>> +} >>> + >>> +u32 __skb_get_rxhash(struct sk_buff *skb) >>> +{ >>> + struct flow_keys keys; >>> + u32 hash; >>> + >>> + if (!skb_flow_dissect(skb, &keys)) >>> + return 0; >>> + >>> + /* get a consistent hash (same value on both flow directions) */ >>> + if (((__force u32)keys.dst < (__force u32)keys.src) || >>> + (((__force u32)keys.dst == (__force u32)keys.src) && >>> + ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) { >>> + swap(keys.dst, keys.src); >>> + swap(keys.port16[0], keys.port16[1]); >>> + } >>> + >>> + init_hashrnd(); >>> + >>> + hash = jhash_3words((__force u32)keys.dst, >>> + (__force u32)keys.src, >>> + (__force u32)keys.ports, hashrnd); >>> + if (!hash) >>> + hash = 1; >>> + >>> +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,34) >>> + skb->rxhash = hash; >>> +#endif >>> + return hash; >>> +} >>> +#endif >>> diff --git a/datapath/linux/compat/include/linux/in.h >>> b/datapath/linux/compat/include/linux/in.h >>> index f91a832..fa2e026 100644 >>> --- a/datapath/linux/compat/include/linux/in.h >>> +++ b/datapath/linux/compat/include/linux/in.h >>> @@ -3,6 +3,26 @@ >>> >>> #include_next <linux/in.h> >>> >>> +#include <linux/module.h> >>> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) >>> +static inline int proto_ports_offset(int proto) >>> +{ >>> + switch (proto) { >>> + case IPPROTO_TCP: >>> + case IPPROTO_UDP: >>> + case IPPROTO_DCCP: >>> + case IPPROTO_ESP: /* SPI */ >>> + case IPPROTO_SCTP: >>> + case IPPROTO_UDPLITE: >>> + return 0; >>> + case IPPROTO_AH: /* SPI */ >>> + return 4; >>> + default: >>> + return -EINVAL; >>> + } >>> +} >>> +#endif >>> + >>> #ifndef HAVE_IPV4_IS_MULTICAST >>> >>> static inline bool ipv4_is_loopback(__be32 addr) >>> diff --git a/datapath/linux/compat/include/linux/skbuff.h >>> b/datapath/linux/compat/include/linux/skbuff.h >>> index d485b39..c9c103d 100644 >>> --- a/datapath/linux/compat/include/linux/skbuff.h >>> +++ b/datapath/linux/compat/include/linux/skbuff.h >>> @@ -251,4 +251,26 @@ static inline void skb_reset_mac_len(struct sk_buff >>> *skb) >>> skb->mac_len = skb->network_header - skb->mac_header; >>> } >>> #endif >>> + >>> +static inline int skb_unclone(struct sk_buff *skb, gfp_t pri) >>> +{ >>> + might_sleep_if(pri & __GFP_WAIT); >>> + >>> + if (skb_cloned(skb)) >>> + return pskb_expand_head(skb, 0, 0, pri); >>> + >>> + return 0; >>> +} >>> + >>> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) >>> +extern u32 __skb_get_rxhash(struct sk_buff *skb); >>> +static inline __u32 skb_get_rxhash(struct sk_buff *skb) >>> +{ >>> +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,34) >>> + if (!skb->rxhash) >>> +#endif >>> + return __skb_get_rxhash(skb); >>> +} >>> +#endif >>> + >>> #endif >>> diff --git a/datapath/linux/compat/include/net/flow_keys.h >>> b/datapath/linux/compat/include/net/flow_keys.h >>> new file mode 100644 >>> index 0000000..4de17d1 >>> --- /dev/null >>> +++ b/datapath/linux/compat/include/net/flow_keys.h >>> @@ -0,0 +1,22 @@ >>> +#ifndef _NET_FLOW_KEYS_WRAPPER_H >>> +#define _NET_FLOW_KEYS_WRAPPER_H >>> + >>> +#include <linux/version.h> >>> + >>> +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,3,0) >>> +#include_next <net/flow_keys.h> >>> +#else >>> +struct flow_keys { >>> + /* (src,dst) must be grouped, in the same way than in IP header */ >>> + __be32 src; >>> + __be32 dst; >>> + union { >>> + __be32 ports; >>> + __be16 port16[2]; >>> + }; >>> + u16 thoff; >>> + u8 ip_proto; >>> +}; >>> +#endif >>> + >>> +#endif >>> diff --git a/datapath/linux/compat/include/net/ip.h >>> b/datapath/linux/compat/include/net/ip.h >>> index b18b968..1dccdea 100644 >>> --- a/datapath/linux/compat/include/net/ip.h >>> +++ b/datapath/linux/compat/include/net/ip.h >>> @@ -11,4 +11,11 @@ extern int ip_local_out(struct sk_buff *skb); >>> >>> #endif /* linux kernel < 2.6.25 */ >>> >>> +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0) >>> +static inline bool ip_is_fragment(const struct iphdr *iph) >>> +{ >>> + return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0; >>> +} >>> +#endif >>> + >>> #endif >>> diff --git a/datapath/linux/compat/include/net/ipv6.h >>> b/datapath/linux/compat/include/net/ipv6.h >>> index d1e3248..7ab234a 100644 >>> --- a/datapath/linux/compat/include/net/ipv6.h >>> +++ b/datapath/linux/compat/include/net/ipv6.h >>> @@ -23,4 +23,19 @@ enum { >>> extern int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, >>> int target, unsigned short *fragoff, int *fragflg); >>> >>> +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) >>> +static inline u32 ipv6_addr_hash(const struct in6_addr *a) >>> +{ >>> +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 >>> + const unsigned long *ul = (const unsigned long *)a; >>> + unsigned long x = ul[0] ^ ul[1]; >>> + >>> + return (u32)(x ^ (x >> 32)); >>> +#else >>> + return (__force u32)(a->s6_addr32[0] ^ a->s6_addr32[1] ^ >>> + a->s6_addr32[2] ^ a->s6_addr32[3]); >>> +#endif >>> +} >>> +#endif >>> + >>> #endif >>> diff --git a/datapath/linux/compat/include/net/vxlan.h >>> b/datapath/linux/compat/include/net/vxlan.h >>> new file mode 100644 >>> index 0000000..102bc0c >>> --- /dev/null >>> +++ b/datapath/linux/compat/include/net/vxlan.h >>> @@ -0,0 +1,43 @@ >>> +#ifndef __NET_VXLAN_WRAPPER_H >>> +#define __NET_VXLAN_WRAPPER_H 1 >>> + >>> +#include <linux/skbuff.h> >>> +#include <linux/netdevice.h> >>> +#include <linux/udp.h> >>> + >>> +/* per UDP socket information */ >>> +struct vxlan_sock { >>> + struct hlist_node hlist; >>> + struct rcu_head rcu; >>> + struct socket *sock; >>> + struct list_head handler_list; >>> +}; >>> + >>> +struct vxlan_handler; >>> +typedef int (vxlan_rcv_t)(struct vxlan_handler *vh, struct sk_buff *skb, >>> __be32 key); >>> + >>> +struct vxlan_handler { >>> + vxlan_rcv_t *rcv; >>> + struct list_head node; >>> + void *data; >>> + struct vxlan_sock *vs; >>> + atomic_t refcnt; >>> + struct rcu_head rcu; >>> + struct work_struct del_work; >>> + int priority; >>> +}; >>> + >>> +void vxlan_handler_put(struct vxlan_handler *vh); >>> + >>> +struct vxlan_handler *vxlan_handler_add(struct net *net, >>> + __be16 portno, vxlan_rcv_t *rcv, >>> + void *data, int priority, bool >>> create); >>> + >>> +int vxlan_xmit_skb(struct net *net, struct vxlan_handler *vh, >>> + struct rtable *rt, struct sk_buff *skb, >>> + __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, >>> + __be16 src_port, __be16 dst_port, __be32 vni); >>> + >>> +__be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb); >>> + >>> +#endif >>> diff --git a/datapath/linux/compat/vxlan.c b/datapath/linux/compat/vxlan.c >>> new file mode 100644 >>> index 0000000..b41ecc2 >>> --- /dev/null >>> +++ b/datapath/linux/compat/vxlan.c >>> @@ -0,0 +1,457 @@ >>> +#include <linux/kernel.h> >>> +#include <linux/types.h> >>> +#include <linux/module.h> >>> +#include <linux/errno.h> >>> +#include <linux/slab.h> >>> +#include <linux/skbuff.h> >>> +#include <linux/rculist.h> >>> +#include <linux/netdevice.h> >>> +#include <linux/in.h> >>> +#include <linux/ip.h> >>> +#include <linux/udp.h> >>> +#include <linux/igmp.h> >>> +#include <linux/etherdevice.h> >>> +#include <linux/if_ether.h> >>> +#include <linux/if_vlan.h> >>> +#include <linux/hash.h> >>> +#include <linux/ethtool.h> >>> +#include <net/arp.h> >>> +#include <net/ndisc.h> >>> +#include <net/ip.h> >>> +#include <net/ip_tunnels.h> >>> +#include <net/icmp.h> >>> +#include <net/udp.h> >>> +#include <net/rtnetlink.h> >>> +#include <net/route.h> >>> +#include <net/dsfield.h> >>> +#include <net/inet_ecn.h> >>> +#include <net/net_namespace.h> >>> +#include <net/netns/generic.h> >>> +#include <net/vxlan.h> >>> + >> Same thing here, no license or copyright. >> > > Most of files in compat directory does not have copyright notice, > anyways I will add it.
I wasn't sure why that was, actually, just thought I'd point it out. Thanks for adding it though Pravin! Kyle _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev