From: zhangliping <zhanglipin...@baidu.com> Under our udp pressure performance test, after gro is disabled, rx rate will be improved from ~2500kpps to ~2800kpps. We can find some difference from perf report: 1. gro is enabled: 24.23% [kernel] [k] udp4_lib_lookup2 5.42% [kernel] [k] __memcpy 3.87% [kernel] [k] fib_table_lookup 3.76% [kernel] [k] __netif_receive_skb_core 3.68% [kernel] [k] ip_rcv
2. gro is disabled: 9.66% [kernel] [k] udp4_lib_lookup2 9.47% [kernel] [k] __memcpy 4.75% [kernel] [k] fib_table_lookup 4.71% [kernel] [k] __netif_receive_skb_core 3.90% [kernel] [k] virtnet_poll So if there's no udp tunnel(such as vxlan) configured, we can skip the udp gro processing. Signed-off-by: zhangliping <zhanglipin...@baidu.com> --- include/net/udp.h | 2 ++ net/ipv4/udp_offload.c | 7 +++++++ net/ipv4/udp_tunnel.c | 11 ++++++++++- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/include/net/udp.h b/include/net/udp.h index 6c759c8594e2..c503f8b06845 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -188,6 +188,8 @@ static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) return uh; } +extern struct static_key_false udp_gro_needed; + /* hash routines shared between UDPv4/6 and UDP-Litev4/6 */ static inline int udp_lib_hash(struct sock *sk) { diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 01801b77bd0d..9cb11a833964 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -10,10 +10,14 @@ * UDPv4 GSO support */ +#include <linux/static_key.h> #include <linux/skbuff.h> #include <net/udp.h> #include <net/protocol.h> +DEFINE_STATIC_KEY_FALSE(udp_gro_needed); +EXPORT_SYMBOL_GPL(udp_gro_needed); + static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, netdev_features_t features, struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb, @@ -250,6 +254,9 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, int flush = 1; struct sock *sk; + if (!static_branch_unlikely(&udp_gro_needed)) + goto out; + if (NAPI_GRO_CB(skb)->encap_mark || (skb->ip_summed != CHECKSUM_PARTIAL && NAPI_GRO_CB(skb)->csum_cnt == 0 && diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index 6539ff15e9a3..4a7b3c8223c0 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -1,4 +1,5 @@ #include <linux/module.h> +#include <linux/static_key.h> #include <linux/errno.h> #include <linux/socket.h> #include <linux/udp.h> @@ -73,6 +74,9 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, udp_sk(sk)->gro_complete = cfg->gro_complete; udp_tunnel_encap_enable(sock); + + if (udp_sk(sk)->gro_receive) + static_branch_inc(&udp_gro_needed); } EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock); @@ -185,7 +189,12 @@ EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb); void udp_tunnel_sock_release(struct socket *sock) { - rcu_assign_sk_user_data(sock->sk, NULL); + struct sock *sk = sock->sk; + + if (udp_sk(sk)->gro_receive) + static_branch_dec(&udp_gro_needed); + + rcu_assign_sk_user_data(sk, NULL); kernel_sock_shutdown(sock, SHUT_RDWR); sock_release(sock); } -- 2.13.4