This is the RX counterpart of commit bec1f6f69736 ("udp: generate gso
with UDP_SEGMENT"). When UDP_GRO is enabled, such socket is also
eligible for GRO in the rx path: UDP segments directed to such socket
are assembled into a larger GSO_UDP_L4 packet.

The core UDP GRO support is enabled with setsockopt(UDP_GRO).

Initial benchmark numbers:

Before:
udp rx:   1079 MB/s   769065 calls/s

After:
udp rx:   1466 MB/s    24877 calls/s

This change introduces a side effect in respect to UDP tunnels:
after a UDP tunnel creation, now the kernel performs a lookup per ingress
UDP packet, while before such lookup happened only if the ingress packet
carried a valid internal header csum.

v1 -> v2:
 - use a new option to enable UDP GRO
 - use static keys to protect the UDP GRO socket lookup

Signed-off-by: Paolo Abeni <pab...@redhat.com>
---
 include/linux/udp.h      |   3 +-
 include/uapi/linux/udp.h |   1 +
 net/ipv4/udp.c           |   7 +++
 net/ipv4/udp_offload.c   | 109 +++++++++++++++++++++++++++++++--------
 net/ipv6/udp_offload.c   |   6 +--
 5 files changed, 98 insertions(+), 28 deletions(-)

diff --git a/include/linux/udp.h b/include/linux/udp.h
index a4dafff407fb..f613b329852e 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -50,11 +50,12 @@ struct udp_sock {
        __u8             encap_type;    /* Is this an Encapsulation socket? */
        unsigned char    no_check6_tx:1,/* Send zero UDP6 checksums on TX? */
                         no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */
-                        encap_enabled:1; /* This socket enabled encap
+                        encap_enabled:1, /* This socket enabled encap
                                           * processing; UDP tunnels and
                                           * different encapsulation layer set
                                           * this
                                           */
+                        gro_enabled:1; /* Can accept GRO packets */
        /*
         * Following member retains the information to create a UDP header
         * when the socket is uncorked.
diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h
index 09502de447f5..30baccb6c9c4 100644
--- a/include/uapi/linux/udp.h
+++ b/include/uapi/linux/udp.h
@@ -33,6 +33,7 @@ struct udphdr {
 #define UDP_NO_CHECK6_TX 101   /* Disable sending checksum for UDP6X */
 #define UDP_NO_CHECK6_RX 102   /* Disable accpeting checksum for UDP6 */
 #define UDP_SEGMENT    103     /* Set GSO segmentation size */
+#define UDP_GRO                104     /* This socket can receive UDP GRO 
packets */
 
 /* UDP encapsulation types */
 #define UDP_ENCAP_ESPINUDP_NON_IKE     1 /* draft-ietf-ipsec-nat-t-ike-00/01 */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 9fcb5374e166..3c277378814f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -115,6 +115,7 @@
 #include "udp_impl.h"
 #include <net/sock_reuseport.h>
 #include <net/addrconf.h>
+#include <net/udp_tunnel.h>
 
 struct udp_table udp_table __read_mostly;
 EXPORT_SYMBOL(udp_table);
@@ -2459,6 +2460,12 @@ int udp_lib_setsockopt(struct sock *sk, int level, int 
optname,
                up->gso_size = val;
                break;
 
+       case UDP_GRO:
+               if (valbool)
+                       udp_tunnel_encap_enable(sk->sk_socket);
+               up->gro_enabled = valbool;
+               break;
+
        /*
         *      UDP-Lite's partial checksum coverage (RFC 3828).
         */
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 802f2bc00d69..d93c1e8097ba 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -343,6 +343,54 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff 
*skb,
        return segs;
 }
 
+#define UDO_GRO_CNT_MAX 64
+static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
+                                              struct sk_buff *skb)
+{
+       struct udphdr *uh = udp_hdr(skb);
+       struct sk_buff *pp = NULL;
+       struct udphdr *uh2;
+       struct sk_buff *p;
+
+       /* requires non zero csum, for simmetry with GSO */
+       if (!uh->check) {
+               NAPI_GRO_CB(skb)->flush = 1;
+               return NULL;
+       }
+
+       /* pull encapsulating udp header */
+       skb_gro_pull(skb, sizeof(struct udphdr));
+       skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
+
+       list_for_each_entry(p, head, list) {
+               if (!NAPI_GRO_CB(p)->same_flow)
+                       continue;
+
+               uh2 = udp_hdr(p);
+
+               /* Match ports only, as csum is always non zero */
+               if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) {
+                       NAPI_GRO_CB(p)->same_flow = 0;
+                       continue;
+               }
+
+               /* Terminate the flow on len mismatch or if it grow "too much".
+                * Under small packet flood GRO count could elsewhere grow a lot
+                * leading to execessive truesize values
+                */
+               if (!skb_gro_receive(p, skb) &&
+                   NAPI_GRO_CB(p)->count > UDO_GRO_CNT_MAX)
+                       pp = p;
+               else if (uh->len != uh2->len)
+                       pp = p;
+
+               return pp;
+       }
+
+       /* mismatch, but we never need to flush */
+       return NULL;
+}
+
 struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
                                struct udphdr *uh, udp_lookup_t lookup)
 {
@@ -353,23 +401,27 @@ struct sk_buff *udp_gro_receive(struct list_head *head, 
struct sk_buff *skb,
        int flush = 1;
        struct sock *sk;
 
+       rcu_read_lock();
+       sk = (*lookup)(skb, uh->source, uh->dest);
+       if (!sk)
+               goto out_unlock;
+
+       if (udp_sk(sk)->gro_enabled) {
+               pp = call_gro_receive(udp_gro_receive_segment, head, skb);
+               rcu_read_unlock();
+               return pp;
+       }
+
        if (NAPI_GRO_CB(skb)->encap_mark ||
            (skb->ip_summed != CHECKSUM_PARTIAL &&
             NAPI_GRO_CB(skb)->csum_cnt == 0 &&
-            !NAPI_GRO_CB(skb)->csum_valid))
-               goto out;
+            !NAPI_GRO_CB(skb)->csum_valid) ||
+           !udp_sk(sk)->gro_receive)
+               goto out_unlock;
 
        /* mark that this skb passed once through the tunnel gro layer */
        NAPI_GRO_CB(skb)->encap_mark = 1;
 
-       rcu_read_lock();
-       sk = (*lookup)(skb, uh->source, uh->dest);
-
-       if (sk && udp_sk(sk)->gro_receive)
-               goto unflush;
-       goto out_unlock;
-
-unflush:
        flush = 0;
 
        list_for_each_entry(p, head, list) {
@@ -394,7 +446,6 @@ struct sk_buff *udp_gro_receive(struct list_head *head, 
struct sk_buff *skb,
 
 out_unlock:
        rcu_read_unlock();
-out:
        skb_gro_flush_final(skb, pp, flush);
        return pp;
 }
@@ -427,6 +478,19 @@ static struct sk_buff *udp4_gro_receive(struct list_head 
*head,
        return NULL;
 }
 
+static int udp_gro_complete_segment(struct sk_buff *skb)
+{
+       struct udphdr *uh = udp_hdr(skb);
+
+       skb->csum_start = (unsigned char *)uh - skb->head;
+       skb->csum_offset = offsetof(struct udphdr, check);
+       skb->ip_summed = CHECKSUM_PARTIAL;
+
+       skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+       skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4;
+       return 0;
+}
+
 int udp_gro_complete(struct sk_buff *skb, int nhoff,
                     udp_lookup_t lookup)
 {
@@ -437,16 +501,21 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
 
        uh->len = newlen;
 
-       /* Set encapsulation before calling into inner gro_complete() functions
-        * to make them set up the inner offsets.
-        */
-       skb->encapsulation = 1;
-
        rcu_read_lock();
        sk = (*lookup)(skb, uh->source, uh->dest);
-       if (sk && udp_sk(sk)->gro_complete)
+       if (sk && udp_sk(sk)->gro_enabled) {
+               err = udp_gro_complete_segment(skb);
+       } else if (sk && udp_sk(sk)->gro_complete) {
+               skb_shinfo(skb)->gso_type = uh->check ? SKB_GSO_UDP_TUNNEL_CSUM
+                                       : SKB_GSO_UDP_TUNNEL;
+
+               /* Set encapsulation before calling into inner gro_complete()
+                * functions to make them set up the inner offsets.
+                */
+               skb->encapsulation = 1;
                err = udp_sk(sk)->gro_complete(sk, skb,
                                nhoff + sizeof(struct udphdr));
+       }
        rcu_read_unlock();
 
        if (skb->remcsum_offload)
@@ -461,13 +530,9 @@ static int udp4_gro_complete(struct sk_buff *skb, int 
nhoff)
        const struct iphdr *iph = ip_hdr(skb);
        struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
 
-       if (uh->check) {
-               skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
+       if (uh->check)
                uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
                                          iph->daddr, 0);
-       } else {
-               skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
-       }
 
        return udp_gro_complete(skb, nhoff, udp4_lib_lookup_skb);
 }
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 1b8e161ac527..828b2457f97b 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -147,13 +147,9 @@ static int udp6_gro_complete(struct sk_buff *skb, int 
nhoff)
        const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
        struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
 
-       if (uh->check) {
-               skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
+       if (uh->check)
                uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr,
                                          &ipv6h->daddr, 0);
-       } else {
-               skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
-       }
 
        return udp_gro_complete(skb, nhoff, udp6_lib_lookup_skb);
 }
-- 
2.17.2

Reply via email to