Following patch improves rxhash calculation, It is taken from upstream Linux kernel code. >From kernel 3.8, skb_get_rxhash() can handle hardware generated l4-rxhash. Therefore compat skb_get_rxhash() is not used on kernel 3.8 or new.
Signed-off-by: Pravin B Shelar <pshe...@nicira.com> --- datapath/linux/Modules.mk | 1 + datapath/linux/compat/flow_dissector.c | 54 +++++++++------ datapath/linux/compat/include/linux/jump_label.h | 79 ++++++++++++++++++++++ datapath/linux/compat/include/linux/net.h | 27 ++++++++ datapath/linux/compat/include/linux/skbuff.h | 8 ++- datapath/linux/compat/utils.c | 48 +++++++++++++ 6 files changed, 194 insertions(+), 23 deletions(-) create mode 100644 datapath/linux/compat/include/linux/jump_label.h diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk index fee132e..aefe9f9 100644 --- a/datapath/linux/Modules.mk +++ b/datapath/linux/Modules.mk @@ -33,6 +33,7 @@ openvswitch_headers += \ linux/compat/include/linux/ip.h \ linux/compat/include/linux/ipv6.h \ linux/compat/include/linux/jiffies.h \ + linux/compat/include/linux/jump_label.h \ linux/compat/include/linux/kconfig.h \ linux/compat/include/linux/kernel.h \ linux/compat/include/linux/list.h \ diff --git a/datapath/linux/compat/flow_dissector.c b/datapath/linux/compat/flow_dissector.c index 8592ca9..f176f9a 100644 --- a/datapath/linux/compat/flow_dissector.c +++ b/datapath/linux/compat/flow_dissector.c @@ -19,7 +19,7 @@ */ #include <linux/version.h> -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0) #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/if_vlan.h> @@ -46,9 +46,25 @@ static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *i memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + sizeof(flow->dst)); } +__be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto) +{ + int poff = proto_ports_offset(ip_proto); + + if (poff >= 0) { + __be32 *ports, _ports; + + ports = skb_header_pointer(skb, thoff + poff, + sizeof(_ports), &_ports); + if (ports) + return *ports; + } + + return 0; +} + static bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow) { - int poff, nhoff = skb_network_offset(skb); + int nhoff = skb_network_offset(skb); u8 ip_proto; __be16 proto = skb->protocol; @@ -86,6 +102,7 @@ ipv6: nhoff += sizeof(struct ipv6hdr); break; } + case __constant_htons(ETH_P_8021AD): case __constant_htons(ETH_P_8021Q): { const struct vlan_hdr *vlan; struct vlan_hdr _vlan; @@ -161,33 +178,30 @@ ipv6: } case IPPROTO_IPIP: goto again; + case IPPROTO_IPV6: + proto = htons(ETH_P_IPV6); + goto ipv6; default: break; } flow->ip_proto = ip_proto; - poff = proto_ports_offset(ip_proto); - if (poff >= 0) { - __be32 *ports, _ports; - - nhoff += poff; - ports = skb_header_pointer(skb, nhoff, sizeof(_ports), &_ports); - if (ports) - flow->ports = *ports; - } - + flow->ports = skb_flow_get_ports(skb, nhoff, ip_proto); flow->thoff = (u16) nhoff; return true; } static u32 hashrnd __read_mostly; +static __always_inline void __flow_hash_secret_init(void) +{ + net_get_random_once(&hashrnd, sizeof(hashrnd)); +} -static void init_hashrnd(void) +static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c) { - if (likely(hashrnd)) - return; - get_random_bytes(&hashrnd, sizeof(hashrnd)); + __flow_hash_secret_init(); + return jhash_3words(a, b, c, hashrnd); } u32 __skb_get_rxhash(struct sk_buff *skb) @@ -206,11 +220,9 @@ u32 __skb_get_rxhash(struct sk_buff *skb) swap(keys.port16[0], keys.port16[1]); } - init_hashrnd(); - - hash = jhash_3words((__force u32)keys.dst, - (__force u32)keys.src, - (__force u32)keys.ports, hashrnd); + hash = __flow_hash_3words((__force u32)keys.dst, + (__force u32)keys.src, + (__force u32)keys.ports); if (!hash) hash = 1; diff --git a/datapath/linux/compat/include/linux/jump_label.h b/datapath/linux/compat/include/linux/jump_label.h new file mode 100644 index 0000000..8eaf776 --- /dev/null +++ b/datapath/linux/compat/include/linux/jump_label.h @@ -0,0 +1,79 @@ +#ifndef _LINUX_JUMP_LABEL_WRAPPER_H +#define _LINUX_JUMP_LABEL_WRAPPER_H + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,4,0) +#include_next<linux/jump_label.h> +#else + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,0,0) +#include_next<linux/jump_label.h> +#endif /* 3.0.0 */ + +#ifdef HAVE_JUMP_LABEL +struct static_key { + atomic_t enabled; +/* Set lsb bit to 1 if branch is default true, 0 ot */ + struct jump_entry *entries; +}; + +#define STATIC_KEY_INIT_FALSE ((struct static_key) \ + { .enabled = ATOMIC_INIT(0), .entries = (void *)0 }) + +static __always_inline bool static_key_false(struct static_key *key) +{ + return arch_static_branch(key); +} + +static __always_inline bool static_key_true(struct static_key *key) +{ + return !static_key_false(key); +} + +static inline void static_key_slow_inc(struct static_key *key) +{ + if (atomic_inc_not_zero(&key->enabled)) + return; + + jump_label_lock(); + if (atomic_read(&key->enabled) == 0) { + if (!jump_label_get_branch_default(key)) + jump_label_update(key, JUMP_LABEL_ENABLE); + else + jump_label_update(key, JUMP_LABEL_DISABLE); + } + atomic_inc(&key->enabled); + jump_label_unlock(); +} + +#else + +/* !HAVE_JUMP_LABEL */ +struct static_key { + atomic_t enabled; +}; + +#define STATIC_KEY_INIT_FALSE ((struct static_key) \ + { .enabled = ATOMIC_INIT(0) }) + +static __always_inline bool static_key_true(struct static_key *key) +{ + if (likely(atomic_read(&key->enabled)) > 0) + return true; + return false; +} + +static inline void static_key_slow_inc(struct static_key *key) +{ + atomic_inc(&key->enabled); +} + +#endif /* HAVE_JUMP_LABEL */ + +#define jump_label_enabled static_key_enabled +static inline bool static_key_enabled(struct static_key *key) +{ + return (atomic_read(&key->enabled) > 0); +} +#endif /* 3.4.0 */ + +#endif /* _LINUX_JUMP_LABEL_H */ diff --git a/datapath/linux/compat/include/linux/net.h b/datapath/linux/compat/include/linux/net.h index 5665e2e..5f01b0d 100644 --- a/datapath/linux/compat/include/linux/net.h +++ b/datapath/linux/compat/include/linux/net.h @@ -2,6 +2,7 @@ #define __LINUX_NET_WRAPPER_H 1 #include_next <linux/net.h> +#include <linux/jump_label.h> #ifndef net_ratelimited_function #define net_ratelimited_function(function, ...) \ @@ -28,4 +29,30 @@ do { \ net_ratelimited_function(pr_debug, fmt, ##__VA_ARGS__) #endif +#ifndef net_get_random_once +bool __net_get_random_once(void *buf, int nbytes, bool *done, + struct static_key *done_key); + +#ifdef HAVE_JUMP_LABEL +#define ___NET_RANDOM_STATIC_KEY_INIT ((struct static_key) \ + { .enabled = ATOMIC_INIT(0), .entries = (void *)1 }) +#else /* !HAVE_JUMP_LABEL */ +#define ___NET_RANDOM_STATIC_KEY_INIT STATIC_KEY_INIT_FALSE +#endif /* HAVE_JUMP_LABEL */ + +#define net_get_random_once(buf, nbytes) \ +({ \ + bool ___ret = false; \ + static bool ___done = false; \ + static struct static_key ___done_key = \ + ___NET_RANDOM_STATIC_KEY_INIT; \ + if (!static_key_true(&___done_key)) \ + ___ret = __net_get_random_once(buf, \ + nbytes, \ + &___done, \ + &___done_key); \ + ___ret; \ +}) +#endif + #endif diff --git a/datapath/linux/compat/include/linux/skbuff.h b/datapath/linux/compat/include/linux/skbuff.h index 9868a98..3af3ddc 100644 --- a/datapath/linux/compat/include/linux/skbuff.h +++ b/datapath/linux/compat/include/linux/skbuff.h @@ -213,12 +213,16 @@ static inline int skb_unclone(struct sk_buff *skb, gfp_t pri) } #endif -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0) +#define __skb_get_rxhash rpl__skb_get_rxhash +#define skb_get_rxhash rpl_skb_get_rxhash + extern u32 __skb_get_rxhash(struct sk_buff *skb); static inline __u32 skb_get_rxhash(struct sk_buff *skb) { #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,34) - if (!skb->rxhash) + if (skb->rxhash) + return skb->rxhash; #endif return __skb_get_rxhash(skb); } diff --git a/datapath/linux/compat/utils.c b/datapath/linux/compat/utils.c index 844d372..a2ff7d1 100644 --- a/datapath/linux/compat/utils.c +++ b/datapath/linux/compat/utils.c @@ -37,3 +37,51 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, csum_unfold(*sum))); } #endif + +struct __net_random_once_work { + struct work_struct work; + struct static_key *key; +}; + +static void __net_random_once_deferred(struct work_struct *w) +{ + struct __net_random_once_work *work = + container_of(w, struct __net_random_once_work, work); + if (!static_key_enabled(work->key)) + static_key_slow_inc(work->key); + kfree(work); +} + +static void __net_random_once_disable_jump(struct static_key *key) +{ + struct __net_random_once_work *w; + + w = kmalloc(sizeof(*w), GFP_ATOMIC); + if (!w) + return; + + INIT_WORK(&w->work, __net_random_once_deferred); + w->key = key; + schedule_work(&w->work); +} + +bool __net_get_random_once(void *buf, int nbytes, bool *done, + struct static_key *done_key) +{ + static DEFINE_SPINLOCK(lock); + unsigned long flags; + + spin_lock_irqsave(&lock, flags); + if (*done) { + spin_unlock_irqrestore(&lock, flags); + return false; + } + + get_random_bytes(buf, nbytes); + *done = true; + spin_unlock_irqrestore(&lock, flags); + + __net_random_once_disable_jump(done_key); + + return true; +} -- 1.7.1 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev