On Mon, Jan 28, 2019 at 2:51 AM Steffen Klassert <steffen.klass...@secunet.com> wrote: > > This patch extends UDP GRO to support fraglist GRO/GSO > by using the previously introduced infrastructure. > All UDP packets that are not targeted to a GRO capable > UDP sockets are going to fraglist GRO now (local input > and forward). > --- > net/ipv4/udp_offload.c | 45 ++++++++++++++++++++++++++++++++++++++---- > net/ipv6/udp_offload.c | 9 +++++++++ > 2 files changed, 50 insertions(+), 4 deletions(-) > > diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c > index 584635db9231..c0be33216750 100644 > --- a/net/ipv4/udp_offload.c > +++ b/net/ipv4/udp_offload.c > @@ -188,6 +188,22 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff > *skb, > } > EXPORT_SYMBOL(skb_udp_tunnel_segment); > > +static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb, > + netdev_features_t features) > +{ > + unsigned int mss = skb_shinfo(skb)->gso_size; > + > + skb = skb_segment_list(skb, features, skb_mac_header_len(skb)); > + if (IS_ERR(skb)) > + return skb; > + > + udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss); > + skb->ip_summed = CHECKSUM_NONE; > + skb->csum_valid = 1;
csum_valid is only used on ingress. Hardcoding CHECKSUM_NONE is probably fine as long as this function is only used for forwarding, assuming we don't care about verifiying checksums in the forwarding case. But this is fragile if we ever add local list segment output. Should convert the checksum field in skb_forward_csum, instead of at the GSO layer, just as for forwarding of non list skbs? Though that would require traversing the list yet another time. Other option is to already do this conversion when building the list in GRO. The comment also applies to the same logic in skb_segment_list. As a matter or fact, even if this belongs in GSO instead of core forwarding or GRO, then probably both the list head and frag_list skbs should be set in the same function, so skb_segment_list. > + > + return skb; > +} > + > struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, > netdev_features_t features) > { > @@ -200,6 +216,9 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, > __sum16 check; > __be16 newlen; > > + if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST) > + return __udp_gso_segment_list(gso_skb, features); > + > mss = skb_shinfo(gso_skb)->gso_size; > if (gso_skb->len <= sizeof(*uh) + mss) > return ERR_PTR(-EINVAL); > @@ -352,16 +371,15 @@ static struct sk_buff *udp_gro_receive_segment(struct > list_head *head, > struct sk_buff *pp = NULL; > struct udphdr *uh2; > struct sk_buff *p; > + int ret; > > /* requires non zero csum, for symmetry with GSO */ > if (!uh->check) { > NAPI_GRO_CB(skb)->flush = 1; > return NULL; > } > - Accidental whitespace removal? > /* pull encapsulating udp header */ > skb_gro_pull(skb, sizeof(struct udphdr)); > - skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); > > list_for_each_entry(p, head, list) { > if (!NAPI_GRO_CB(p)->same_flow) > @@ -379,8 +397,17 @@ static struct sk_buff *udp_gro_receive_segment(struct > list_head *head, > * Under small packet flood GRO count could elsewhere grow a > lot > * leading to execessive truesize values > */ > - if (!skb_gro_receive(p, skb) && > - NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX) > + if (NAPI_GRO_CB(skb)->is_flist) { > + if (!pskb_may_pull(skb, skb_gro_offset(skb))) > + return NULL; > + ret = skb_gro_receive_list(p, skb); > + } else { > + skb_gro_postpull_rcsum(skb, uh, sizeof(struct > udphdr)); > + > + ret = skb_gro_receive(p, skb); > + } > + > + if (!ret && NAPI_GRO_CB(p)->count > UDP_GRO_CNT_MAX) > pp = p; > else if (uh->len != uh2->len) > pp = p; > @@ -402,6 +429,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, > struct sk_buff *skb, > int flush = 1; > > if (!sk || !udp_sk(sk)->gro_receive) { > + NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled: 1; This updates the choice of whether to use a list on each received skb. Which is problematic as a socket can call the setsockopt in between packets. Actually, there no longer is a need for a route lookup for each skb at all. We always apply GRO, which was the previous reason for the lookup. And if a matching flow is found in the GRO table, we already the choice to use a list is already stored. > pp = call_gro_receive(udp_gro_receive_segment, head, skb); > return pp; > } > @@ -530,6 +558,15 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct > sk_buff *skb, int nhoff) > const struct iphdr *iph = ip_hdr(skb); > struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); > > + if (NAPI_GRO_CB(skb)->is_flist) { > + uh->len = htons(skb->len - nhoff); > + > + skb_shinfo(skb)->gso_type |= > (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4); > + skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; > + > + return 0; > + } > + > if (uh->check) > uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr, > iph->daddr, 0); > diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c > index 5f7937a4f71a..7c3f28310baa 100644 > --- a/net/ipv6/udp_offload.c > +++ b/net/ipv6/udp_offload.c > @@ -154,6 +154,15 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct > sk_buff *skb, int nhoff) > const struct ipv6hdr *ipv6h = ipv6_hdr(skb); > struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); > > + if (NAPI_GRO_CB(skb)->is_flist) { > + uh->len = htons(skb->len - nhoff); > + > + skb_shinfo(skb)->gso_type |= > (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4); > + skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; > + > + return 0; > + } > + > if (uh->check) > uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr, > &ipv6h->daddr, 0); > -- > 2.17.1 >