This is the RX counter part of commit bec1f6f69736 ("udp: generate gso with UDP_SEGMENT"). When UDP_SEGMENT is enabled, such socket is also eligible for GRO in the rx path: UDP segments directed to such socket are assembled into a larger GSO_UDP_L4 packet.
The core UDP GRO support is enabled/updated on setsockopt(UDP_SEGMENT) and disabled, if needed at socket destruction time. Initial benchmark numbers: Before: udp rx: 1079 MB/s 769065 calls/s After: udp rx: 1466 MB/s 24877 calls/s This change introduces a side effect in respect to UDP tunnels: after an UDP tunnel creation, now the kernel performs a lookup per ingress UDP packet, before such lookup happended only if the ingress packet carried a valid internal header csum. Signed-off-by: Paolo Abeni <pab...@redhat.com> --- include/linux/udp.h | 2 +- net/ipv4/udp.c | 1 + net/ipv4/udp_offload.c | 107 +++++++++++++++++++++++++++++++++-------- net/ipv6/udp_offload.c | 6 +-- 4 files changed, 90 insertions(+), 26 deletions(-) diff --git a/include/linux/udp.h b/include/linux/udp.h index 56a321a55ba1..27dea956ef6e 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -128,7 +128,7 @@ static inline bool udp_get_gro_in_use(struct sock *sk) static inline bool udp_want_gro(struct sock *sk) { - return udp_sk(sk)->gro_receive; + return udp_sk(sk)->gro_receive || udp_sk(sk)->gso_size; } #define udp_portaddr_for_each_entry(__sk, list) \ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 5ac794230013..871ee55afd96 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2450,6 +2450,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, if (val < 0 || val > USHRT_MAX) return -EINVAL; up->gso_size = val; + udp_update_gro_in_use(sk, udp_want_gro(sk)); break; /* diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 08b225adf763..4ff150bb84de 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -347,6 +347,54 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, return segs; } +#define UDO_GRO_CNT_MAX 64 +static struct sk_buff *udp_gro_receive_segment(struct list_head *head, + struct sk_buff *skb) +{ + struct udphdr *uh = udp_hdr(skb); + struct sk_buff *pp = NULL; + struct udphdr *uh2; + struct sk_buff *p; + + /* requires non zero csum, for simmetry with GSO */ + if (!uh->check) { + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + } + + /* pull encapsulating udp header */ + skb_gro_pull(skb, sizeof(struct udphdr)); + skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); + + list_for_each_entry(p, head, list) { + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + uh2 = udp_hdr(p); + + /* Match ports only, as csum is always non zero */ + if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + + /* Terminate the flow on len mismatch or if it grow "too much". + * Under small packet flood GRO count could elsewhere grow a lot + * leading to execessive truesize values + */ + if (!skb_gro_receive(p, skb) && + NAPI_GRO_CB(p)->count > UDO_GRO_CNT_MAX) + pp = p; + else if (uh->len != uh2->len) + pp = p; + + return pp; + } + + /* mismatch, but we never need to flush */ + return NULL; +} + struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, struct udphdr *uh, udp_lookup_t lookup) { @@ -357,23 +405,29 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, int flush = 1; struct sock *sk; + rcu_read_lock(); + sk = (*lookup)(skb, uh->source, uh->dest); + if (!sk) + goto out_unlock; + + if (udp_sk(sk)->gso_size) { + pp = call_gro_receive(udp_gro_receive_segment, head, skb); + rcu_read_unlock(); + return pp; + } + if (NAPI_GRO_CB(skb)->encap_mark || (skb->ip_summed != CHECKSUM_PARTIAL && NAPI_GRO_CB(skb)->csum_cnt == 0 && !NAPI_GRO_CB(skb)->csum_valid)) - goto out; + goto out_unlock; /* mark that this skb passed once through the tunnel gro layer */ NAPI_GRO_CB(skb)->encap_mark = 1; - rcu_read_lock(); - sk = (*lookup)(skb, uh->source, uh->dest); - - if (sk && udp_sk(sk)->gro_receive) - goto unflush; - goto out_unlock; + if (!udp_sk(sk)->gro_receive) + goto out_unlock; -unflush: flush = 0; list_for_each_entry(p, head, list) { @@ -398,7 +452,6 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, out_unlock: rcu_read_unlock(); -out: skb_gro_flush_final(skb, pp, flush); return pp; } @@ -431,6 +484,19 @@ static struct sk_buff *udp4_gro_receive(struct list_head *head, return NULL; } +static int udp_gro_complete_segment(struct sk_buff *skb) +{ + struct udphdr *uh = udp_hdr(skb); + + skb->csum_start = (unsigned char *)uh - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + skb->ip_summed = CHECKSUM_PARTIAL; + + skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4; + return 0; +} + int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup) { @@ -441,16 +507,21 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff, uh->len = newlen; - /* Set encapsulation before calling into inner gro_complete() functions - * to make them set up the inner offsets. - */ - skb->encapsulation = 1; - rcu_read_lock(); sk = (*lookup)(skb, uh->source, uh->dest); - if (sk && udp_sk(sk)->gro_complete) + if (sk && udp_sk(sk)->gso_size) { + err = udp_gro_complete_segment(skb); + } else if (sk && udp_sk(sk)->gro_complete) { + skb_shinfo(skb)->gso_type = uh->check ? SKB_GSO_UDP_TUNNEL_CSUM + : SKB_GSO_UDP_TUNNEL; + + /* Set encapsulation before calling into inner gro_complete() + * functions to make them set up the inner offsets. + */ + skb->encapsulation = 1; err = udp_sk(sk)->gro_complete(sk, skb, nhoff + sizeof(struct udphdr)); + } rcu_read_unlock(); if (skb->remcsum_offload) @@ -465,13 +536,9 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff) const struct iphdr *iph = ip_hdr(skb); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); - if (uh->check) { - skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; + if (uh->check) uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr, iph->daddr, 0); - } else { - skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; - } return udp_gro_complete(skb, nhoff, udp4_lib_lookup_skb); } diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 1df968a3e788..f2731b7b4c75 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -147,13 +147,9 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff) const struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); - if (uh->check) { - skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; + if (uh->check) uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr, &ipv6h->daddr, 0); - } else { - skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; - } return udp_gro_complete(skb, nhoff, udp6_lib_lookup_skb); } -- 2.17.1