This is the RX counter part of commit bec1f6f69736 ("udp: generate gso
with UDP_SEGMENT"). When UDP_SEGMENT is enabled, such socket is also
eligible for GRO in the rx path: UDP segments directed to such socket
are assembled into a larger GSO_UDP_L4 packet.

The core UDP GRO support is enabled/updated on setsockopt(UDP_SEGMENT) and
disabled, if needed at socket destruction time.

Initial benchmark numbers:

Before:
udp rx:   1079 MB/s   769065 calls/s

After:
udp rx:   1466 MB/s    24877 calls/s

This change introduces a side effect in respect to UDP tunnels:
after an UDP tunnel creation, now the kernel performs a lookup per ingress UDP
packet, before such lookup happended only if the ingress packet carried a valid
internal header csum.

Signed-off-by: Paolo Abeni <pab...@redhat.com>
---
 include/linux/udp.h    |   2 +-
 net/ipv4/udp.c         |   1 +
 net/ipv4/udp_offload.c | 107 +++++++++++++++++++++++++++++++++--------
 net/ipv6/udp_offload.c |   6 +--
 4 files changed, 90 insertions(+), 26 deletions(-)

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 56a321a55ba1..27dea956ef6e 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -128,7 +128,7 @@ static inline bool udp_get_gro_in_use(struct sock *sk)
 
 static inline bool udp_want_gro(struct sock *sk)
 {
-       return udp_sk(sk)->gro_receive;
+       return udp_sk(sk)->gro_receive || udp_sk(sk)->gso_size;
 }
 
 #define udp_portaddr_for_each_entry(__sk, list) \
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5ac794230013..871ee55afd96 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2450,6 +2450,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int 
optname,
                if (val < 0 || val > USHRT_MAX)
                        return -EINVAL;
                up->gso_size = val;
+               udp_update_gro_in_use(sk, udp_want_gro(sk));
                break;
 
        /*
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 08b225adf763..4ff150bb84de 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -347,6 +347,54 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff 
*skb,
        return segs;
 }
 
+#define UDO_GRO_CNT_MAX 64
+static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
+                                              struct sk_buff *skb)
+{
+       struct udphdr *uh = udp_hdr(skb);
+       struct sk_buff *pp = NULL;
+       struct udphdr *uh2;
+       struct sk_buff *p;
+
+       /* requires non zero csum, for simmetry with GSO */
+       if (!uh->check) {
+               NAPI_GRO_CB(skb)->flush = 1;
+               return NULL;
+       }
+
+       /* pull encapsulating udp header */
+       skb_gro_pull(skb, sizeof(struct udphdr));
+       skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
+
+       list_for_each_entry(p, head, list) {
+               if (!NAPI_GRO_CB(p)->same_flow)
+                       continue;
+
+               uh2 = udp_hdr(p);
+
+               /* Match ports only, as csum is always non zero */
+               if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) {
+                       NAPI_GRO_CB(p)->same_flow = 0;
+                       continue;
+               }
+
+               /* Terminate the flow on len mismatch or if it grow "too much".
+                * Under small packet flood GRO count could elsewhere grow a lot
+                * leading to execessive truesize values
+                */
+               if (!skb_gro_receive(p, skb) &&
+                   NAPI_GRO_CB(p)->count > UDO_GRO_CNT_MAX)
+                       pp = p;
+               else if (uh->len != uh2->len)
+                       pp = p;
+
+               return pp;
+       }
+
+       /* mismatch, but we never need to flush */
+       return NULL;
+}
+
 struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
                                struct udphdr *uh, udp_lookup_t lookup)
 {
@@ -357,23 +405,29 @@ struct sk_buff *udp_gro_receive(struct list_head *head, 
struct sk_buff *skb,
        int flush = 1;
        struct sock *sk;
 
+       rcu_read_lock();
+       sk = (*lookup)(skb, uh->source, uh->dest);
+       if (!sk)
+               goto out_unlock;
+
+       if (udp_sk(sk)->gso_size) {
+               pp = call_gro_receive(udp_gro_receive_segment, head, skb);
+               rcu_read_unlock();
+               return pp;
+       }
+
        if (NAPI_GRO_CB(skb)->encap_mark ||
            (skb->ip_summed != CHECKSUM_PARTIAL &&
             NAPI_GRO_CB(skb)->csum_cnt == 0 &&
             !NAPI_GRO_CB(skb)->csum_valid))
-               goto out;
+               goto out_unlock;
 
        /* mark that this skb passed once through the tunnel gro layer */
        NAPI_GRO_CB(skb)->encap_mark = 1;
 
-       rcu_read_lock();
-       sk = (*lookup)(skb, uh->source, uh->dest);
-
-       if (sk && udp_sk(sk)->gro_receive)
-               goto unflush;
-       goto out_unlock;
+       if (!udp_sk(sk)->gro_receive)
+               goto out_unlock;
 
-unflush:
        flush = 0;
 
        list_for_each_entry(p, head, list) {
@@ -398,7 +452,6 @@ struct sk_buff *udp_gro_receive(struct list_head *head, 
struct sk_buff *skb,
 
 out_unlock:
        rcu_read_unlock();
-out:
        skb_gro_flush_final(skb, pp, flush);
        return pp;
 }
@@ -431,6 +484,19 @@ static struct sk_buff *udp4_gro_receive(struct list_head 
*head,
        return NULL;
 }
 
+static int udp_gro_complete_segment(struct sk_buff *skb)
+{
+       struct udphdr *uh = udp_hdr(skb);
+
+       skb->csum_start = (unsigned char *)uh - skb->head;
+       skb->csum_offset = offsetof(struct udphdr, check);
+       skb->ip_summed = CHECKSUM_PARTIAL;
+
+       skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+       skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4;
+       return 0;
+}
+
 int udp_gro_complete(struct sk_buff *skb, int nhoff,
                     udp_lookup_t lookup)
 {
@@ -441,16 +507,21 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
 
        uh->len = newlen;
 
-       /* Set encapsulation before calling into inner gro_complete() functions
-        * to make them set up the inner offsets.
-        */
-       skb->encapsulation = 1;
-
        rcu_read_lock();
        sk = (*lookup)(skb, uh->source, uh->dest);
-       if (sk && udp_sk(sk)->gro_complete)
+       if (sk && udp_sk(sk)->gso_size) {
+               err = udp_gro_complete_segment(skb);
+       } else if (sk && udp_sk(sk)->gro_complete) {
+               skb_shinfo(skb)->gso_type = uh->check ? SKB_GSO_UDP_TUNNEL_CSUM
+                                       : SKB_GSO_UDP_TUNNEL;
+
+               /* Set encapsulation before calling into inner gro_complete()
+                * functions to make them set up the inner offsets.
+                */
+               skb->encapsulation = 1;
                err = udp_sk(sk)->gro_complete(sk, skb,
                                nhoff + sizeof(struct udphdr));
+       }
        rcu_read_unlock();
 
        if (skb->remcsum_offload)
@@ -465,13 +536,9 @@ static int udp4_gro_complete(struct sk_buff *skb, int 
nhoff)
        const struct iphdr *iph = ip_hdr(skb);
        struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
 
-       if (uh->check) {
-               skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
+       if (uh->check)
                uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
                                          iph->daddr, 0);
-       } else {
-               skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
-       }
 
        return udp_gro_complete(skb, nhoff, udp4_lib_lookup_skb);
 }
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 1df968a3e788..f2731b7b4c75 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -147,13 +147,9 @@ static int udp6_gro_complete(struct sk_buff *skb, int 
nhoff)
        const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
        struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
 
-       if (uh->check) {
-               skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
+       if (uh->check)
                uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr,
                                          &ipv6h->daddr, 0);
-       } else {
-               skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
-       }
 
        return udp_gro_complete(skb, nhoff, udp6_lib_lookup_skb);
 }
-- 
2.17.1

Reply via email to