On Fri, Dec 1, 2017 at 3:31 PM, Shaohua Li <s...@kernel.org> wrote: > From: Shaohua Li <s...@fb.com> > > Currently if there is negative routing, we change sock's txhash, so the > sock will have a different flowlabel and route to different path. > According to Tom, we'd better to have option to enable this, because some > routers require flowlabel consistent. By default, we maintain consistent > flowlabel, eg, negative routing doesn't change flowlabel. > > Suggested-by: Tom Herbert <t...@herbertland.com> > Signed-off-by: Shaohua Li <s...@fb.com> > --- > Documentation/networking/ip-sysctl.txt | 7 +++++++ > include/net/netns/ipv6.h | 1 + > include/net/sock.h | 28 +++++++++++++++------------- > net/ipv6/af_inet6.c | 1 + > net/ipv6/sysctl_net_ipv6.c | 8 ++++++++ > 5 files changed, 32 insertions(+), 13 deletions(-) > > diff --git a/Documentation/networking/ip-sysctl.txt > b/Documentation/networking/ip-sysctl.txt > index 46c7e10..14132a0 100644 > --- a/Documentation/networking/ip-sysctl.txt > +++ b/Documentation/networking/ip-sysctl.txt > @@ -1345,6 +1345,13 @@ auto_flowlabels - INTEGER > be disabled by the socket option > Default: 1 > > +consistent_auto_flowlabel - BOOLEAN
I think we should call it consistent_txhash since this isn't just about the flow label. > + When auto_flowlabels is enabled, this option makes socket flowlabel > + consistent in the lifetime. > + TRUE: enabled > + FALSE: disabled > + Default: TRUE > + > flowlabel_state_ranges - BOOLEAN > Split the flow label number space into two ranges. 0-0x7FFFF is > reserved for the IPv6 flow manager facility, 0x80000-0xFFFFF > diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h > index 987cc45..e55f851 100644 > --- a/include/net/netns/ipv6.h > +++ b/include/net/netns/ipv6.h > @@ -30,6 +30,7 @@ struct netns_sysctl_ipv6 { > int ip6_rt_min_advmss; > int flowlabel_consistency; > int auto_flowlabels; > + int consistent_auto_flowlabel; > int icmpv6_time; > int anycast_src_echo_reply; > int ip_nonlocal_bind; > diff --git a/include/net/sock.h b/include/net/sock.h > index b9cb9d2..45e868f 100644 > --- a/include/net/sock.h > +++ b/include/net/sock.h > @@ -1729,6 +1729,18 @@ static inline kuid_t sock_net_uid(const struct net > *net, const struct sock *sk) > return sk ? sk->sk_uid : make_kuid(net->user_ns, 0); > } > > +static inline > +struct net *sock_net(const struct sock *sk) > +{ > + return read_pnet(&sk->sk_net); > +} > + > +static inline > +void sock_net_set(struct sock *sk, struct net *net) > +{ > + write_pnet(&sk->sk_net, net); > +} > + > static inline void sk_set_txhash(struct sock *sk, u32 hash) > { > sk->sk_txhash = hash; > @@ -1736,7 +1748,9 @@ static inline void sk_set_txhash(struct sock *sk, u32 > hash) > > static inline void sk_rethink_txhash(struct sock *sk) > { > - if (sk->sk_txhash) { > + struct net *net = sock_net(sk); > + > + if (sk->sk_txhash && !net->ipv6.sysctl.consistent_auto_flowlabel) { > u32 v = prandom_u32(); > sk->sk_txhash = v ?: 1; > } > @@ -2291,18 +2305,6 @@ static inline void sk_eat_skb(struct sock *sk, struct > sk_buff *skb) > __kfree_skb(skb); > } > > -static inline > -struct net *sock_net(const struct sock *sk) > -{ > - return read_pnet(&sk->sk_net); > -} > - > -static inline > -void sock_net_set(struct sock *sk, struct net *net) > -{ > - write_pnet(&sk->sk_net, net); > -} > - > static inline struct sock *skb_steal_sock(struct sk_buff *skb) > { > if (skb->sk) { > diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c > index c26f712..fe9b312 100644 > --- a/net/ipv6/af_inet6.c > +++ b/net/ipv6/af_inet6.c > @@ -807,6 +807,7 @@ static int __net_init inet6_net_init(struct net *net) > net->ipv6.sysctl.icmpv6_time = 1*HZ; > net->ipv6.sysctl.flowlabel_consistency = 1; > net->ipv6.sysctl.auto_flowlabels = IP6_DEFAULT_AUTO_FLOW_LABELS; > + net->ipv6.sysctl.consistent_auto_flowlabel = 1; > net->ipv6.sysctl.idgen_retries = 3; > net->ipv6.sysctl.idgen_delay = 1 * HZ; > net->ipv6.sysctl.flowlabel_state_ranges = 0; > diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c > index a789a8a..8908092 100644 > --- a/net/ipv6/sysctl_net_ipv6.c > +++ b/net/ipv6/sysctl_net_ipv6.c > @@ -126,6 +126,13 @@ static struct ctl_table ipv6_table_template[] = { > .mode = 0644, > .proc_handler = proc_dointvec > }, > + { > + .procname = "consistent_auto_flowlabel", > + .data = > &init_net.ipv6.sysctl.consistent_auto_flowlabel, > + .maxlen = sizeof(int), > + .mode = 0644, > + .proc_handler = proc_dointvec > + }, > { } > }; > > @@ -190,6 +197,7 @@ static int __net_init ipv6_sysctl_net_init(struct net > *net) > ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt; > ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len; > ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len; > + ipv6_table[14].data = &net->ipv6.sysctl.consistent_auto_flowlabel; > > ipv6_route_table = ipv6_route_sysctl_init(net); > if (!ipv6_route_table) > -- > 2.9.5 >