> On Mar 14, 2017, at 5:36 PM, Nikolay Aleksandrov > <niko...@cumulusnetworks.com> wrote: > > This patch adds support for ECMP hash policy choice via a new sysctl > called fib_multipath_hash_policy and also adds support for L4 hashes. > The current values for fib_multipath_hash_policy are: > 0 - layer 3 (default) > 1 - layer 4 > If there's an skb hash already set and it matches the chosen policy then it > will be used instead of being calculated (currently only for L4). > In L3 mode we always calculate the hash due to the ICMP error special > case, the flow dissector's field consistentification should handle the > address order thus we can remove the address reversals. > > Signed-off-by: Nikolay Aleksandrov <niko...@cumulusnetworks.com> > --- > v3: > - keep the ICMP error special handling and always calc L3 hash > Jakub, could you please run your tests with this version ? > > v2: > - removed the output_key_hash as it's not needed anymore > - reverted to my original/internal patch with L3 as default hash > > Documentation/networking/ip-sysctl.txt | 8 +++ > include/net/ip_fib.h | 14 ++---- > include/net/netns/ipv4.h | 1 + > include/net/route.h | 9 +--- > net/ipv4/fib_semantics.c | 11 ++-- > net/ipv4/icmp.c | 19 +------ > net/ipv4/route.c | 92 ++++++++++++++++++++++++++-------- > net/ipv4/sysctl_net_ipv4.c | 9 ++++ > 8 files changed, 98 insertions(+), 65 deletions(-) > [snip] > /* To make ICMP packets follow the right flow, the multipath hash is > - * calculated from the inner IP addresses in reverse order. > + * calculated from the inner IP addresses. > */ > -static int ip_multipath_icmp_hash(struct sk_buff *skb) > +static void ip_multipath_icmp_hash(const struct sk_buff *skb, > + struct flowi4 *fl4) > { > const struct iphdr *outer_iph = ip_hdr(skb); > struct icmphdr _icmph; > @@ -1746,33 +1746,85 @@ static int ip_multipath_icmp_hash(struct sk_buff *skb) > struct iphdr _inner_iph; > const struct iphdr *inner_iph; > > + fl4->saddr = outer_iph->saddr; > + fl4->daddr = outer_iph->daddr; > if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0)) > - goto standard_hash; > + return; > > icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph), > &_icmph); > if (!icmph) > - goto standard_hash; > + return; > > if (icmph->type != ICMP_DEST_UNREACH && > icmph->type != ICMP_REDIRECT && > icmph->type != ICMP_TIME_EXCEEDED && > - icmph->type != ICMP_PARAMETERPROB) { > - goto standard_hash; > - } > + icmph->type != ICMP_PARAMETERPROB) > + return; > > inner_iph = skb_header_pointer(skb, > outer_iph->ihl * 4 + sizeof(_icmph), > sizeof(_inner_iph), &_inner_iph); > if (!inner_iph) > - goto standard_hash; > + return; > + fl4->saddr = inner_iph->saddr; > + fl4->daddr = inner_iph->daddr; > +} > > - return fib_multipath_hash(inner_iph->daddr, inner_iph->saddr); > +int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, > + const struct sk_buff *skb) > +{ > + struct net *net = fi->fib_net; > + struct flow_keys hash_keys; > + u32 mhash; > > -standard_hash: > - return fib_multipath_hash(outer_iph->saddr, outer_iph->daddr); > -} > + switch (net->ipv4.sysctl_fib_multipath_hash_policy) { > + case 0: > + memset(&hash_keys, 0, sizeof(hash_keys)); > + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; > + if (skb && ip_hdr(skb)->protocol == IPPROTO_ICMP) { > + struct flowi4 _fl4; > > + ip_multipath_icmp_hash(skb, &_fl4);
Ugh, obviously I could’ve just passed hash_keys here, will change this in v4 but I’ll wait to see if there aren’t any other comments or issues. Thanks, Nik > + hash_keys.addrs.v4addrs.src = _fl4.saddr; > + hash_keys.addrs.v4addrs.dst = _fl4.daddr; > + } else { > + hash_keys.addrs.v4addrs.src = fl4->saddr; > + hash_keys.addrs.v4addrs.dst = fl4->daddr; > + } > + break; > + case 1: > + /* skb is currently provided only when forwarding */ > + if (skb) { > + unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP; > + struct flow_keys keys; > + > + /* short-circuit if we already have L4 hash present */ > + if (skb->l4_hash) > + return skb_get_hash_raw(skb) >> 1; > + memset(&hash_keys, 0, sizeof(hash_keys)); > + skb_flow_dissect_flow_keys(skb, &keys, flag); > + hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; > + hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; > + hash_keys.ports.src = keys.ports.src; > + hash_keys.ports.dst = keys.ports.dst; > + hash_keys.basic.ip_proto = keys.basic.ip_proto; > + } else { > + memset(&hash_keys, 0, sizeof(hash_keys)); > + hash_keys.control.addr_type = > FLOW_DISSECTOR_KEY_IPV4_ADDRS; > + hash_keys.addrs.v4addrs.src = fl4->saddr; > + hash_keys.addrs.v4addrs.dst = fl4->daddr; > + hash_keys.ports.src = fl4->fl4_sport; > + hash_keys.ports.dst = fl4->fl4_dport; > + hash_keys.basic.ip_proto = fl4->flowi4_proto; > + } > + break; > + } > + mhash = flow_hash_from_keys(&hash_keys); > + > + return mhash >> 1; > +} > +EXPORT_SYMBOL_GPL(fib_multipath_hash); > #endif /* CONFIG_IP_ROUTE_MULTIPATH */