Certain system process significant unconnected UDP workload. It would be preferrable to disable UDP early demux for those systems and enable it for TCP only.
Signed-off-by: Subash Abhinov Kasiviswanathan <subas...@codeaurora.org> Suggested-by: Eric Dumazet <eduma...@google.com> --- include/net/netns/ipv4.h | 2 ++ include/net/protocol.h | 3 ++- net/ipv4/af_inet.c | 9 ++++++--- net/ipv4/ip_input.c | 2 +- net/ipv4/sysctl_net_ipv4.c | 14 ++++++++++++++ net/ipv6/ip6_input.c | 2 +- net/ipv6/tcp_ipv6.c | 3 ++- 7 files changed, 28 insertions(+), 7 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 0378e88..1e74da23 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -86,6 +86,8 @@ struct netns_ipv4 { /* Shall we try to damage output packets if routing dev changes? */ int sysctl_ip_dynaddr; int sysctl_ip_early_demux; + int sysctl_tcp_early_demux; + int sysctl_udp_early_demux; int sysctl_fwmark_reflect; int sysctl_tcp_fwmark_accept; diff --git a/include/net/protocol.h b/include/net/protocol.h index bf36ca3..f8ede39 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -40,6 +40,7 @@ /* This is used to register protocols. */ struct net_protocol { void (*early_demux)(struct sk_buff *skb); + int *early_demux_enabled; int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, u32 info); unsigned int no_policy:1, @@ -54,7 +55,7 @@ struct net_protocol { #if IS_ENABLED(CONFIG_IPV6) struct inet6_protocol { void (*early_demux)(struct sk_buff *skb); - + int *early_demux_enabled; int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index f750698..5a1d30e 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1579,7 +1579,7 @@ u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_offset) }; #endif -static const struct net_protocol tcp_protocol = { +static struct net_protocol tcp_protocol = { .early_demux = tcp_v4_early_demux, .handler = tcp_v4_rcv, .err_handler = tcp_v4_err, @@ -1588,7 +1588,7 @@ u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_offset) .icmp_strict_tag_validation = 1, }; -static const struct net_protocol udp_protocol = { +static struct net_protocol udp_protocol = { .early_demux = udp_v4_early_demux, .handler = udp_rcv, .err_handler = udp_err, @@ -1699,7 +1699,10 @@ static __net_init int inet_init_net(struct net *net) */ net->ipv4.sysctl_ip_default_ttl = IPDEFTTL; net->ipv4.sysctl_ip_dynaddr = 0; - net->ipv4.sysctl_ip_early_demux = 1; + net->ipv4.sysctl_udp_early_demux = 1; + net->ipv4.sysctl_tcp_early_demux = 1; + tcp_protocol.early_demux_enabled = &net->ipv4.sysctl_tcp_early_demux; + udp_protocol.early_demux_enabled = &net->ipv4.sysctl_udp_early_demux; return 0; } diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index d6feabb..187feae 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -329,7 +329,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) int protocol = iph->protocol; ipprot = rcu_dereference(inet_protos[protocol]); - if (ipprot && ipprot->early_demux) { + if (ipprot && ipprot->early_demux && *ipprot->early_demux_enabled) { ipprot->early_demux(skb); /* must reload iph, skb->head might have changed */ iph = ip_hdr(skb); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index b2fa498..b212af9 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -737,6 +737,20 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write, .proc_handler = proc_dointvec }, { + .procname = "udp_early_demux", + .data = &init_net.ipv4.sysctl_udp_early_demux, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "tcp_early_demux", + .data = &init_net.ipv4.sysctl_tcp_early_demux, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { .procname = "ip_default_ttl", .data = &init_net.ipv4.sysctl_ip_default_ttl, .maxlen = sizeof(int), diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index aacfb4b..b34f737 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -60,7 +60,7 @@ int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) const struct inet6_protocol *ipprot; ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); - if (ipprot && ipprot->early_demux) + if (ipprot && ipprot->early_demux && *ipprot->early_demux_enabled) ipprot->early_demux(skb); } if (!skb_valid_dst(skb)) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 4c60c6f..fb73a41 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1926,7 +1926,7 @@ struct proto tcpv6_prot = { .diag_destroy = tcp_abort, }; -static const struct inet6_protocol tcpv6_protocol = { +static struct inet6_protocol tcpv6_protocol = { .early_demux = tcp_v6_early_demux, .handler = tcp_v6_rcv, .err_handler = tcp_v6_err, @@ -1944,6 +1944,7 @@ struct proto tcpv6_prot = { static int __net_init tcpv6_net_init(struct net *net) { + tcpv6_protocol.early_demux_enabled = &net->ipv4.sysctl_tcp_early_demux; return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, SOCK_RAW, IPPROTO_TCP, net); } -- 1.9.1