From: Shaohua Li <s...@fb.com> Currently if there is negative routing, we change sock's txhash, so the sock will have a different flowlabel and route to different path. According to Tom, we'd better to have option to enable this, because some routers require flowlabel consistent. By default, we maintain consistent flowlabel, eg, negative routing doesn't change flowlabel.
Suggested-by: Tom Herbert <t...@herbertland.com> Signed-off-by: Shaohua Li <s...@fb.com> --- Documentation/networking/ip-sysctl.txt | 7 +++++++ include/net/netns/ipv6.h | 1 + include/net/sock.h | 28 +++++++++++++++------------- net/ipv6/af_inet6.c | 1 + net/ipv6/sysctl_net_ipv6.c | 8 ++++++++ 5 files changed, 32 insertions(+), 13 deletions(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 46c7e10..14132a0 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1345,6 +1345,13 @@ auto_flowlabels - INTEGER be disabled by the socket option Default: 1 +consistent_auto_flowlabel - BOOLEAN + When auto_flowlabels is enabled, this option makes socket flowlabel + consistent in the lifetime. + TRUE: enabled + FALSE: disabled + Default: TRUE + flowlabel_state_ranges - BOOLEAN Split the flow label number space into two ranges. 0-0x7FFFF is reserved for the IPv6 flow manager facility, 0x80000-0xFFFFF diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 987cc45..e55f851 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -30,6 +30,7 @@ struct netns_sysctl_ipv6 { int ip6_rt_min_advmss; int flowlabel_consistency; int auto_flowlabels; + int consistent_auto_flowlabel; int icmpv6_time; int anycast_src_echo_reply; int ip_nonlocal_bind; diff --git a/include/net/sock.h b/include/net/sock.h index b9cb9d2..45e868f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1729,6 +1729,18 @@ static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk) return sk ? sk->sk_uid : make_kuid(net->user_ns, 0); } +static inline +struct net *sock_net(const struct sock *sk) +{ + return read_pnet(&sk->sk_net); +} + +static inline +void sock_net_set(struct sock *sk, struct net *net) +{ + write_pnet(&sk->sk_net, net); +} + static inline void sk_set_txhash(struct sock *sk, u32 hash) { sk->sk_txhash = hash; @@ -1736,7 +1748,9 @@ static inline void sk_set_txhash(struct sock *sk, u32 hash) static inline void sk_rethink_txhash(struct sock *sk) { - if (sk->sk_txhash) { + struct net *net = sock_net(sk); + + if (sk->sk_txhash && !net->ipv6.sysctl.consistent_auto_flowlabel) { u32 v = prandom_u32(); sk->sk_txhash = v ?: 1; } @@ -2291,18 +2305,6 @@ static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb) __kfree_skb(skb); } -static inline -struct net *sock_net(const struct sock *sk) -{ - return read_pnet(&sk->sk_net); -} - -static inline -void sock_net_set(struct sock *sk, struct net *net) -{ - write_pnet(&sk->sk_net, net); -} - static inline struct sock *skb_steal_sock(struct sk_buff *skb) { if (skb->sk) { diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index c26f712..fe9b312 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -807,6 +807,7 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.icmpv6_time = 1*HZ; net->ipv6.sysctl.flowlabel_consistency = 1; net->ipv6.sysctl.auto_flowlabels = IP6_DEFAULT_AUTO_FLOW_LABELS; + net->ipv6.sysctl.consistent_auto_flowlabel = 1; net->ipv6.sysctl.idgen_retries = 3; net->ipv6.sysctl.idgen_delay = 1 * HZ; net->ipv6.sysctl.flowlabel_state_ranges = 0; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index a789a8a..8908092 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -126,6 +126,13 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "consistent_auto_flowlabel", + .data = &init_net.ipv6.sysctl.consistent_auto_flowlabel, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; @@ -190,6 +197,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt; ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len; ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len; + ipv6_table[14].data = &net->ipv6.sysctl.consistent_auto_flowlabel; ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) -- 2.9.5