[RESEND PATCH net v4] udp: ipv4: manipulate network header of NATed UDP GRO fraglist

2021-01-29 Thread Dongseok Yi
UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
forwarding. Only the header of head_skb from ip_finish_output_gso ->
skb_gso_segment is updated but following frag_skbs are not updated.

A call path skb_mac_gso_segment -> inet_gso_segment ->
udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
does not try to update UDP/IP header of the segment list but copy
only the MAC header.

Update port, addr and check of each skb of the segment list in
__udp_gso_segment_list. It covers both SNAT and DNAT.

Fixes: 9fd1ff5d2ac7 (udp: Support UDP fraglist GRO/GSO.)
Signed-off-by: Dongseok Yi 
Acked-by: Steffen Klassert 
---
v1:
Steffen Klassert said, there could be 2 options.
https://lore.kernel.org/patchwork/patch/1362257/
I was trying to write a quick fix, but it was not easy to forward
segmented list. Currently, assuming DNAT only.

v2:
Per Steffen Klassert request, moved the procedure from
udp4_ufo_fragment to __udp_gso_segment_list and support SNAT.

v3:
Per Steffen Klassert request, applied fast return by comparing seg
and seg->next at the beginning of __udpv4_gso_segment_list_csum.

Fixed uh->dest = *newport and iph->daddr = *newip to
*oldport = *newport and *oldip = *newip.

v4:
Clear "Changes Requested" mark in
https://patchwork.kernel.org/project/netdevbpf

Simplified the return statement in __udp_gso_segment_list.

 include/net/udp.h  |  2 +-
 net/ipv4/udp_offload.c | 69 ++
 net/ipv6/udp_offload.c |  2 +-
 3 files changed, 66 insertions(+), 7 deletions(-)

diff --git a/include/net/udp.h b/include/net/udp.h
index 877832b..01351ba 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -178,7 +178,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, 
struct sk_buff *skb,
 int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
 
 struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
- netdev_features_t features);
+ netdev_features_t features, bool is_ipv6);
 
 static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
 {
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index ff39e94..cfc8726 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -187,8 +187,67 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 }
 EXPORT_SYMBOL(skb_udp_tunnel_segment);
 
+static void __udpv4_gso_segment_csum(struct sk_buff *seg,
+__be32 *oldip, __be32 *newip,
+__be16 *oldport, __be16 *newport)
+{
+   struct udphdr *uh;
+   struct iphdr *iph;
+
+   if (*oldip == *newip && *oldport == *newport)
+   return;
+
+   uh = udp_hdr(seg);
+   iph = ip_hdr(seg);
+
+   if (uh->check) {
+   inet_proto_csum_replace4(&uh->check, seg, *oldip, *newip,
+true);
+   inet_proto_csum_replace2(&uh->check, seg, *oldport, *newport,
+false);
+   if (!uh->check)
+   uh->check = CSUM_MANGLED_0;
+   }
+   *oldport = *newport;
+
+   csum_replace4(&iph->check, *oldip, *newip);
+   *oldip = *newip;
+}
+
+static struct sk_buff *__udpv4_gso_segment_list_csum(struct sk_buff *segs)
+{
+   struct sk_buff *seg;
+   struct udphdr *uh, *uh2;
+   struct iphdr *iph, *iph2;
+
+   seg = segs;
+   uh = udp_hdr(seg);
+   iph = ip_hdr(seg);
+
+   if ((udp_hdr(seg)->dest == udp_hdr(seg->next)->dest) &&
+   (udp_hdr(seg)->source == udp_hdr(seg->next)->source) &&
+   (ip_hdr(seg)->daddr == ip_hdr(seg->next)->daddr) &&
+   (ip_hdr(seg)->saddr == ip_hdr(seg->next)->saddr))
+   return segs;
+
+   while ((seg = seg->next)) {
+   uh2 = udp_hdr(seg);
+   iph2 = ip_hdr(seg);
+
+   __udpv4_gso_segment_csum(seg,
+&iph2->saddr, &iph->saddr,
+&uh2->source, &uh->source);
+   __udpv4_gso_segment_csum(seg,
+&iph2->daddr, &iph->daddr,
+&uh2->dest, &uh->dest);
+   }
+
+   return segs;
+}
+
 static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
- netdev_features_t features)
+ netdev_features_t features,
+ bool is_ipv6)
 {
unsigned int mss = skb_shinfo(skb)->gso_size;
 
@@ -198,11 +257,11 @@ static struct sk_buff *__udp_gso_segment_list(struct 
sk_buff *skb,
 
udp_hdr(skb)-&

RE: [RESEND PATCH net v4] udp: ipv4: manipulate network header of NATed UDP GRO fraglist

2021-02-01 Thread Dongseok Yi
On 1/31/21 12:55 AM, Alexander Lobakin wrote:
> From: Dongseok Yi 
> Date: Sat, 30 Jan 2021 08:13:27 +0900
> 
> > +static struct sk_buff *__udpv4_gso_segment_list_csum(struct sk_buff *segs)
> > +{
> > +   struct sk_buff *seg;
> > +   struct udphdr *uh, *uh2;
> > +   struct iphdr *iph, *iph2;
> > +
> > +   seg = segs;
> > +   uh = udp_hdr(seg);
> > +   iph = ip_hdr(seg);
> > +
> > +   if ((udp_hdr(seg)->dest == udp_hdr(seg->next)->dest) &&
> > +   (udp_hdr(seg)->source == udp_hdr(seg->next)->source) &&
> > +   (ip_hdr(seg)->daddr == ip_hdr(seg->next)->daddr) &&
> > +   (ip_hdr(seg)->saddr == ip_hdr(seg->next)->saddr))
> > +   return segs;
> > +
> > +   while ((seg = seg->next)) {
> > +   uh2 = udp_hdr(seg);
> > +   iph2 = ip_hdr(seg);
> > +
> > +   __udpv4_gso_segment_csum(seg,
> > +&iph2->saddr, &iph->saddr,
> > +&uh2->source, &uh->source);
> > +   __udpv4_gso_segment_csum(seg,
> > +&iph2->daddr, &iph->daddr,
> > +&uh2->dest, &uh->dest);
> > +   }
> > +
> > +   return segs;
> > +}
> > +
> >  static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
> > - netdev_features_t features)
> > + netdev_features_t features,
> > + bool is_ipv6)
> >  {
> > unsigned int mss = skb_shinfo(skb)->gso_size;
> >
> > @@ -198,11 +257,11 @@ static struct sk_buff *__udp_gso_segment_list(struct 
> > sk_buff *skb,
> >
> > udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss);
> >
> > -   return skb;
> > +   return is_ipv6 ? skb : __udpv4_gso_segment_list_csum(skb);
> 
> I don't think it's okay to fix checksums only for IPv4.
> IPv6 checksum mangling doesn't depend on any code from net/ipv6. Just
> use inet_proto_csum_replace16() for v6 addresses (see nf_nat_proto.c
> for reference). You can guard the path for IPv6 with
> IS_ENABLED(CONFIG_IPV6) to optimize IPv4-only systems a bit.

As you can see in __udpv4_gso_segment_list_csum, we compare
ports and addrs. We should use *struct ipv6hdr* to compare the values
for IPv6 but I am not sure the struct could be under net/ipv4.

The initial idea was to support both IPv4 and IPv6. Thanks, that's a
good point. But the supporting IPv6 would be a new feature. I want to
fix IPv4 first, so the title is restricted to ipv4.

> 
> >  }
> >
> >  struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
> > - netdev_features_t features)
> > + netdev_features_t features, bool is_ipv6)
> >  {
> > struct sock *sk = gso_skb->sk;
> > unsigned int sum_truesize = 0;
> > @@ -214,7 +273,7 @@ struct sk_buff *__udp_gso_segment(struct sk_buff 
> > *gso_skb,
> > __be16 newlen;
> >
> > if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST)
> > -   return __udp_gso_segment_list(gso_skb, features);
> > +   return __udp_gso_segment_list(gso_skb, features, is_ipv6);
> >
> > mss = skb_shinfo(gso_skb)->gso_size;
> > if (gso_skb->len <= sizeof(*uh) + mss)
> > @@ -328,7 +387,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff 
> > *skb,
> > goto out;
> >
> > if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
> > -   return __udp_gso_segment(skb, features);
> > +   return __udp_gso_segment(skb, features, false);
> >
> > mss = skb_shinfo(skb)->gso_size;
> > if (unlikely(skb->len <= mss))
> > diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
> > index c7bd7b1..faa823c 100644
> > --- a/net/ipv6/udp_offload.c
> > +++ b/net/ipv6/udp_offload.c
> > @@ -42,7 +42,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff 
> > *skb,
> > goto out;
> >
> > if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
> > -   return __udp_gso_segment(skb, features);
> > +   return __udp_gso_segment(skb, features, true);
> >
> > mss = skb_shinfo(skb)->gso_size;
> > if (unlikely(skb->len <= mss))
> > --
> > 2.7.4
> 
> Thanks,
> Al




RE: [RFC PATCH net] udp: check sk for UDP GRO fraglist

2021-01-10 Thread Dongseok Yi
On 2021-01-08 22:35, Steffen Klassert wrote:
> On Fri, Jan 08, 2021 at 09:52:28PM +0900, Dongseok Yi wrote:
> > It is a workaround patch.
> >
> > UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
> > forwarding. Only the header of head_skb from ip_finish_output_gso ->
> > skb_gso_segment is updated but following frag_skbs are not updated.
> >
> > A call path skb_mac_gso_segment -> inet_gso_segment ->
> > udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
> > does not try to update any UDP/IP header of the segment list.
> >
> > It might make sense because each skb of frag_skbs is converted to a
> > list of regular packets. Header update with checksum calculation may
> > be not needed for UDP GROed frag_skbs.
> >
> > But UDP GRO frag_list is started from udp_gro_receive, we don't know
> > whether the skb will be NAT forwarded at that time. For workaround,
> > try to get sock always when call udp4_gro_receive -> udp_gro_receive
> > to check if the skb is for local.
> >
> > I'm still not sure if UDP GRO frag_list is really designed for local
> > session only. Can kernel support NAT forward for UDP GRO frag_list?
> > What am I missing?
> 
> The initial idea when I implemented this was to have a fast
> forwarding path for UDP. So forwarding is a usecase, but NAT
> is a problem, indeed. A quick fix could be to segment the
> skb before it gets NAT forwarded. Alternatively we could
> check for a header change in __udp_gso_segment_list and
> update the header of the frag_skbs accordingly in that case.

Thank you for explaining.
Can I think of it as a known issue? I think we should have a fix
because NAT can be triggered by user. Can I check the current status?
Already planning a patch or a new patch should be written?



RE: [PATCH net-next] udp: allow forwarding of plain (non-fraglisted) UDP GRO packets

2021-01-12 Thread Dongseok Yi
On 2021-01-13 12:10, Alexander Duyck wrote:
> On 1/12/21 1:16 PM, Alexander Lobakin wrote:
> > Commit 9fd1ff5d2ac7 ("udp: Support UDP fraglist GRO/GSO.") actually
> > not only added a support for fraglisted UDP GRO, but also tweaked
> > some logics the way that non-fraglisted UDP GRO started to work for
> > forwarding too.
> > Tests showed that currently forwarding and NATing of plain UDP GRO
> > packets are performed fully correctly, regardless if the target
> > netdevice has a support for hardware/driver GSO UDP L4 or not.
> > Add the last element and allow to form plain UDP GRO packets if
> > there is no socket -> we are on forwarding path.
> >

Your patch is very similar with the RFC what I submitted but has
different approach. My concern was NAT forwarding.
https://lore.kernel.org/patchwork/patch/1362257/

Nevertheless, I agreed with your idea that allow fraglisted UDP GRO
if there is socket.

> > Plain UDP GRO forwarding even shows better performance than fraglisted
> > UDP GRO in some cases due to not wasting one skbuff_head per every
> > segment.
> >
> > Signed-off-by: Alexander Lobakin 
> > ---
> >   net/ipv4/udp_offload.c | 5 +++--
> >   1 file changed, 3 insertions(+), 2 deletions(-)
> >
> > diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> > index ff39e94781bf..9d71df3d52ce 100644
> > --- a/net/ipv4/udp_offload.c
> > +++ b/net/ipv4/udp_offload.c
> > @@ -460,12 +460,13 @@ struct sk_buff *udp_gro_receive(struct list_head 
> > *head, struct sk_buff *skb,
> > if (skb->dev->features & NETIF_F_GRO_FRAGLIST)
> > NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled: 1;

is_flist can be true even if !sk.

> >
> > -   if ((sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist) {
> > +   if (!sk || (sk && udp_sk(sk)->gro_enabled) ||

Actually sk would be NULL by udp_encap_needed_key in udp4_gro_receive
or udp6_gro_receive.

> > +   NAPI_GRO_CB(skb)->is_flist) {
> > pp = call_gro_receive(udp_gro_receive_segment, head, skb);

udp_gro_receive_segment will check is_flist first and try to do
fraglisted UDP GRO. Can you check what I'm missing?

> > return pp;
> > }
> >
> 
> The second check for sk in "(sk && udp_sk(sk)->gro_enabled)" is
> redundant and can be dropped. You already verified it is present when
> you checked for !sk before the logical OR.
> 

Sorry, Alexander Duyck. I believe Alexander Lobakin will answer this.

> > -   if (!sk || NAPI_GRO_CB(skb)->encap_mark ||
> > +   if (NAPI_GRO_CB(skb)->encap_mark ||
> > (skb->ip_summed != CHECKSUM_PARTIAL &&
> >  NAPI_GRO_CB(skb)->csum_cnt == 0 &&
> >  !NAPI_GRO_CB(skb)->csum_valid) ||
> >




RE: [PATCH net v2] net: fix use-after-free when UDP GRO with shared fraglist

2021-01-07 Thread Dongseok Yi
On 2021-01-07 20:05, Daniel Borkmann wrote:
> 
> On 1/7/21 1:39 AM, Dongseok Yi wrote:
> > skbs in fraglist could be shared by a BPF filter loaded at TC. It
> > triggers skb_ensure_writable -> pskb_expand_head ->
> > skb_clone_fraglist -> skb_get on each skb in the fraglist.
> >
> > While tcpdump, sk_receive_queue of PF_PACKET has the original fraglist.
> > But the same fraglist is queued to PF_INET (or PF_INET6) as the fraglist
> > chain made by skb_segment_list.
> >
> > If the new skb (not fraglist) is queued to one of the sk_receive_queue,
> > multiple ptypes can see this. The skb could be released by ptypes and
> > it causes use-after-free.
> >
> > [ 4443.426215] [ cut here ]
> > [ 4443.426222] refcount_t: underflow; use-after-free.
> > [ 4443.426291] WARNING: CPU: 7 PID: 28161 at lib/refcount.c:190
> > refcount_dec_and_test_checked+0xa4/0xc8
> > [ 4443.426726] pstate: 6045 (nZCv daif +PAN -UAO)
> > [ 4443.426732] pc : refcount_dec_and_test_checked+0xa4/0xc8
> > [ 4443.426737] lr : refcount_dec_and_test_checked+0xa0/0xc8
> > [ 4443.426808] Call trace:
> > [ 4443.426813]  refcount_dec_and_test_checked+0xa4/0xc8
> > [ 4443.426823]  skb_release_data+0x144/0x264
> > [ 4443.426828]  kfree_skb+0x58/0xc4
> > [ 4443.426832]  skb_queue_purge+0x64/0x9c
> > [ 4443.426844]  packet_set_ring+0x5f0/0x820
> > [ 4443.426849]  packet_setsockopt+0x5a4/0xcd0
> > [ 4443.426853]  __sys_setsockopt+0x188/0x278
> > [ 4443.426858]  __arm64_sys_setsockopt+0x28/0x38
> > [ 4443.426869]  el0_svc_common+0xf0/0x1d0
> > [ 4443.426873]  el0_svc_handler+0x74/0x98
> > [ 4443.426880]  el0_svc+0x8/0xc
> >
> > Fixes: 3a1296a38d0c (net: Support GRO/GSO fraglist chaining.)
> > Signed-off-by: Dongseok Yi 
> > Acked-by: Willem de Bruijn 
> > ---
> >   net/core/skbuff.c | 20 +++-
> >   1 file changed, 19 insertions(+), 1 deletion(-)
> >
> > v2: Expand the commit message to clarify a BPF filter loaded
> >
> > diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> > index f62cae3..1dcbda8 100644
> > --- a/net/core/skbuff.c
> > +++ b/net/core/skbuff.c
> > @@ -3655,7 +3655,8 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
> > unsigned int delta_truesize = 0;
> > unsigned int delta_len = 0;
> > struct sk_buff *tail = NULL;
> > -   struct sk_buff *nskb;
> > +   struct sk_buff *nskb, *tmp;
> > +   int err;
> >
> > skb_push(skb, -skb_network_offset(skb) + offset);
> >
> > @@ -3665,11 +3666,28 @@ struct sk_buff *skb_segment_list(struct sk_buff 
> > *skb,
> > nskb = list_skb;
> > list_skb = list_skb->next;
> >
> > +   err = 0;
> > +   if (skb_shared(nskb)) {
> > +   tmp = skb_clone(nskb, GFP_ATOMIC);
> > +   if (tmp) {
> > +   kfree_skb(nskb);
> 
> Should use consume_skb() to not trigger skb:kfree_skb tracepoint when looking
> for drops in the stack.

I will use to consume_skb() on the next version.

> 
> > +   nskb = tmp;
> > +   err = skb_unclone(nskb, GFP_ATOMIC);
> 
> Could you elaborate why you also need to unclone? This looks odd here. tc 
> layer
> (independent of BPF) from ingress & egress side generally assumes unshared 
> skb,
> so above clone + dropping ref of nskb looks okay to make the main skb struct 
> private
> for mangling attributes (e.g. mark) & should suffice. What is the exact 
> purpose of
> the additional skb_unclone() in this context?

Willem de Bruijn said:
udp_rcv_segment later converts the udp-gro-list skb to a list of
regular packets to pass these one-by-one to udp_queue_rcv_one_skb.
Now all the frags are fully fledged packets, with headers pushed
before the payload.

PF_PACKET handles untouched fraglist. To modify the payload only
for udp_rcv_segment, skb_unclone is necessary.

> 
> > +   } else {
> > +   err = -ENOMEM;
> > +   }
> > +   }
> > +
> > if (!tail)
> > skb->next = nskb;
> > else
> > tail->next = nskb;
> >
> > +   if (unlikely(err)) {
> > +   nskb->next = list_skb;
> > +   goto err_linearize;
> > +   }
> > +
> > tail = nskb;
> >
> > delta_len += nskb->len;
> >




[PATCH net v3] net: fix use-after-free when UDP GRO with shared fraglist

2021-01-07 Thread Dongseok Yi
skbs in fraglist could be shared by a BPF filter loaded at TC. If TC
writes, it will call skb_ensure_writable -> pskb_expand_head to create
a private linear section for the head_skb. And then call
skb_clone_fraglist -> skb_get on each skb in the fraglist.

skb_segment_list overwrites part of the skb linear section of each
fragment itself. Even after skb_clone, the frag_skbs share their
linear section with their clone in PF_PACKET.

Both sk_receive_queue of PF_PACKET and PF_INET (or PF_INET6) can have
a link for the same frag_skbs chain. If a new skb (not frags) is
queued to one of the sk_receive_queue, multiple ptypes can see and
release this. It causes use-after-free.

[ 4443.426215] [ cut here ]
[ 4443.426222] refcount_t: underflow; use-after-free.
[ 4443.426291] WARNING: CPU: 7 PID: 28161 at lib/refcount.c:190
refcount_dec_and_test_checked+0xa4/0xc8
[ 4443.426726] pstate: 6045 (nZCv daif +PAN -UAO)
[ 4443.426732] pc : refcount_dec_and_test_checked+0xa4/0xc8
[ 4443.426737] lr : refcount_dec_and_test_checked+0xa0/0xc8
[ 4443.426808] Call trace:
[ 4443.426813]  refcount_dec_and_test_checked+0xa4/0xc8
[ 4443.426823]  skb_release_data+0x144/0x264
[ 4443.426828]  kfree_skb+0x58/0xc4
[ 4443.426832]  skb_queue_purge+0x64/0x9c
[ 4443.426844]  packet_set_ring+0x5f0/0x820
[ 4443.426849]  packet_setsockopt+0x5a4/0xcd0
[ 4443.426853]  __sys_setsockopt+0x188/0x278
[ 4443.426858]  __arm64_sys_setsockopt+0x28/0x38
[ 4443.426869]  el0_svc_common+0xf0/0x1d0
[ 4443.426873]  el0_svc_handler+0x74/0x98
[ 4443.426880]  el0_svc+0x8/0xc

Fixes: 3a1296a38d0c (net: Support GRO/GSO fraglist chaining.)
Signed-off-by: Dongseok Yi 
Acked-by: Willem de Bruijn 
---
 net/core/skbuff.c | 20 +++-
 1 file changed, 19 insertions(+), 1 deletion(-)

v2:
per Willem de Bruijn request,
expanded the commit message to clarify a BPF filter loaded.

v3:
per Daniel Borkmann request,
added further details from Willem in the commit message,
and use consume_skb instead of kfree_skb.

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f62cae3..b6f2b52 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3655,7 +3655,8 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
unsigned int delta_truesize = 0;
unsigned int delta_len = 0;
struct sk_buff *tail = NULL;
-   struct sk_buff *nskb;
+   struct sk_buff *nskb, *tmp;
+   int err;
 
skb_push(skb, -skb_network_offset(skb) + offset);
 
@@ -3665,11 +3666,28 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
nskb = list_skb;
list_skb = list_skb->next;
 
+   err = 0;
+   if (skb_shared(nskb)) {
+   tmp = skb_clone(nskb, GFP_ATOMIC);
+   if (tmp) {
+   consume_skb(nskb);
+   nskb = tmp;
+   err = skb_unclone(nskb, GFP_ATOMIC);
+   } else {
+   err = -ENOMEM;
+   }
+   }
+
if (!tail)
skb->next = nskb;
else
tail->next = nskb;
 
+   if (unlikely(err)) {
+   nskb->next = list_skb;
+   goto err_linearize;
+   }
+
tail = nskb;
 
delta_len += nskb->len;
-- 
2.7.4



[RFC PATCH net] udp: check sk for UDP GRO fraglist

2021-01-08 Thread Dongseok Yi
It is a workaround patch.

UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
forwarding. Only the header of head_skb from ip_finish_output_gso ->
skb_gso_segment is updated but following frag_skbs are not updated.

A call path skb_mac_gso_segment -> inet_gso_segment ->
udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
does not try to update any UDP/IP header of the segment list.

It might make sense because each skb of frag_skbs is converted to a
list of regular packets. Header update with checksum calculation may
be not needed for UDP GROed frag_skbs.

But UDP GRO frag_list is started from udp_gro_receive, we don't know
whether the skb will be NAT forwarded at that time. For workaround,
try to get sock always when call udp4_gro_receive -> udp_gro_receive
to check if the skb is for local.

I'm still not sure if UDP GRO frag_list is really designed for local
session only. Can kernel support NAT forward for UDP GRO frag_list?
What am I missing?

Fixes: 9fd1ff5d2ac7 (udp: Support UDP fraglist GRO/GSO.)
Signed-off-by: Dongseok Yi 
---
 net/ipv4/udp_offload.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index ff39e94..d476216 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -457,7 +457,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, 
struct sk_buff *skb,
int flush = 1;
 
NAPI_GRO_CB(skb)->is_flist = 0;
-   if (skb->dev->features & NETIF_F_GRO_FRAGLIST)
+   if (sk && (skb->dev->features & NETIF_F_GRO_FRAGLIST))
NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled: 1;
 
if ((sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist) {
@@ -537,8 +537,7 @@ struct sk_buff *udp4_gro_receive(struct list_head *head, 
struct sk_buff *skb)
NAPI_GRO_CB(skb)->is_ipv6 = 0;
rcu_read_lock();
 
-   if (static_branch_unlikely(&udp_encap_needed_key))
-   sk = udp4_gro_lookup_skb(skb, uh->source, uh->dest);
+   sk = udp4_gro_lookup_skb(skb, uh->source, uh->dest);
 
pp = udp_gro_receive(head, skb, uh, sk);
rcu_read_unlock();
-- 
2.7.4



[PATCH net] udp: ipv4: manipulate network header of NATed UDP GRO fraglist

2021-01-14 Thread Dongseok Yi
UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
forwarding. Only the header of head_skb from ip_finish_output_gso ->
skb_gso_segment is updated but following frag_skbs are not updated.

A call path skb_mac_gso_segment -> inet_gso_segment ->
udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
does not try to update UDP/IP header of the segment list.

Update dport, daddr and checksums of each skb of the segment list
after __udp_gso_segment.

Fixes: 9fd1ff5d2ac7 (udp: Support UDP fraglist GRO/GSO.)
Signed-off-by: Dongseok Yi 
---
Steffen Klassert said, there could be 2 options.
https://lore.kernel.org/patchwork/patch/1362257/

I was trying to write a quick fix, but it was not easy to forward
segmented list. Currently, assuming DNAT only. Should we consider
SNAT too?

 net/ipv4/udp_offload.c | 45 +
 1 file changed, 41 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index ff39e94..7e24928 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -309,10 +309,12 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff 
*skb,
 netdev_features_t features)
 {
struct sk_buff *segs = ERR_PTR(-EINVAL);
+   struct sk_buff *seg;
unsigned int mss;
__wsum csum;
-   struct udphdr *uh;
-   struct iphdr *iph;
+   struct udphdr *uh, *uh2;
+   struct iphdr *iph, *iph2;
+   bool is_fraglist = false;
 
if (skb->encapsulation &&
(skb_shinfo(skb)->gso_type &
@@ -327,8 +329,43 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff 
*skb,
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto out;
 
-   if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
-   return __udp_gso_segment(skb, features);
+   if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
+   if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)
+   is_fraglist = true;
+
+   segs = __udp_gso_segment(skb, features);
+   if (IS_ERR_OR_NULL(segs) || !is_fraglist)
+   return segs;
+
+   seg = segs;
+   uh = udp_hdr(seg);
+   iph = ip_hdr(seg);
+
+   while ((seg = seg->next)) {
+   uh2 = udp_hdr(seg);
+   iph2 = ip_hdr(seg);
+
+   if (uh->dest == uh2->dest && iph->daddr == iph2->daddr)
+   continue;
+
+   if (uh2->check) {
+   inet_proto_csum_replace4(&uh2->check, seg,
+iph2->daddr,
+iph->daddr, true);
+   inet_proto_csum_replace2(&uh2->check, seg,
+uh2->dest, uh->dest,
+false);
+   if (!uh2->check)
+   uh2->check = CSUM_MANGLED_0;
+   }
+   uh2->dest = uh->dest;
+
+   csum_replace4(&iph2->check, iph2->daddr, iph->daddr);
+   iph2->daddr = iph->daddr;
+   }
+
+   return segs;
+   }
 
mss = skb_shinfo(skb)->gso_size;
if (unlikely(skb->len <= mss))
-- 
2.7.4



RE: [PATCH net] udp: ipv4: manipulate network header of NATed UDP GRO fraglist

2021-01-15 Thread Dongseok Yi
On 2021-01-15 17:12, Steffen Klassert wrote:
> On Fri, Jan 15, 2021 at 02:58:24PM +0900, Dongseok Yi wrote:
> > UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
> > forwarding. Only the header of head_skb from ip_finish_output_gso ->
> > skb_gso_segment is updated but following frag_skbs are not updated.
> >
> > A call path skb_mac_gso_segment -> inet_gso_segment ->
> > udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
> > does not try to update UDP/IP header of the segment list.
> 
> We still need to find out why it works for Alexander, but not for you.
> Different usecases?

This patch is not for
https://lore.kernel.org/patchwork/patch/1364544/
Alexander might want to call udp_gro_receive_segment even when
!sk and ~NETIF_F_GRO_FRAGLIST.

> 
> We copy only the MAC header in skb_segment_list(), so I think
> this is a valid bug when NAT changed the UDP header.
> 
> >
> > Update dport, daddr and checksums of each skb of the segment list
> > after __udp_gso_segment.
> >
> > Fixes: 9fd1ff5d2ac7 (udp: Support UDP fraglist GRO/GSO.)
> > Signed-off-by: Dongseok Yi 
> > ---
> > Steffen Klassert said, there could be 2 options.
> > https://lore.kernel.org/patchwork/patch/1362257/
> >
> > I was trying to write a quick fix, but it was not easy to forward
> > segmented list. Currently, assuming DNAT only. Should we consider
> > SNAT too?
> 
> If it is broken, then it is broken for both, so yes.

Okay.

> 
> >
> >  net/ipv4/udp_offload.c | 45 +
> >  1 file changed, 41 insertions(+), 4 deletions(-)
> >
> > diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> > index ff39e94..7e24928 100644
> > --- a/net/ipv4/udp_offload.c
> > +++ b/net/ipv4/udp_offload.c
> > @@ -309,10 +309,12 @@ static struct sk_buff *udp4_ufo_fragment(struct 
> > sk_buff *skb,
> >  netdev_features_t features)
> >  {
> > struct sk_buff *segs = ERR_PTR(-EINVAL);
> > +   struct sk_buff *seg;
> > unsigned int mss;
> > __wsum csum;
> > -   struct udphdr *uh;
> > -   struct iphdr *iph;
> > +   struct udphdr *uh, *uh2;
> > +   struct iphdr *iph, *iph2;
> > +   bool is_fraglist = false;
> >
> > if (skb->encapsulation &&
> > (skb_shinfo(skb)->gso_type &
> > @@ -327,8 +329,43 @@ static struct sk_buff *udp4_ufo_fragment(struct 
> > sk_buff *skb,
> > if (!pskb_may_pull(skb, sizeof(struct udphdr)))
> > goto out;
> >
> > -   if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
> > -   return __udp_gso_segment(skb, features);
> > +   if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
> > +   if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)
> > +   is_fraglist = true;
> > +
> > +   segs = __udp_gso_segment(skb, features);
> > +   if (IS_ERR_OR_NULL(segs) || !is_fraglist)
> > +   return segs;
> > +
> > +   seg = segs;
> > +   uh = udp_hdr(seg);
> > +   iph = ip_hdr(seg);
> > +
> > +   while ((seg = seg->next)) {
> > +   uh2 = udp_hdr(seg);
> > +   iph2 = ip_hdr(seg);
> > +
> > +   if (uh->dest == uh2->dest && iph->daddr == iph2->daddr)
> > +   continue;
> > +
> > +   if (uh2->check) {
> > +   inet_proto_csum_replace4(&uh2->check, seg,
> > +iph2->daddr,
> > +iph->daddr, true);
> > +   inet_proto_csum_replace2(&uh2->check, seg,
> > +uh2->dest, uh->dest,
> > +false);
> > +   if (!uh2->check)
> > +   uh2->check = CSUM_MANGLED_0;
> > +   }
> > +   uh2->dest = uh->dest;
> > +
> > +   csum_replace4(&iph2->check, iph2->daddr, iph->daddr);
> > +   iph2->daddr = iph->daddr;
> > +   }
> > +
> > +   return segs;
> > +   }
> 
> I would not like to add this to a generic codepath. I think we can
> relatively easy copy the full headers in skb_segment_list().

RE: [PATCH net] udp: ipv4: manipulate network header of NATed UDP GRO fraglist

2021-01-15 Thread Dongseok Yi
On 2021-01-15 19:51, Alexander Lobakin wrote:
> From: Steffen Klassert 
> Date: Fri, 15 Jan 2021 10:27:52 +0100
> 
> > On Fri, Jan 15, 2021 at 05:55:22PM +0900, Dongseok Yi wrote:
> >> On 2021-01-15 17:12, Steffen Klassert wrote:
> >>> On Fri, Jan 15, 2021 at 02:58:24PM +0900, Dongseok Yi wrote:
> >>>> UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
> >>>> forwarding. Only the header of head_skb from ip_finish_output_gso ->
> >>>> skb_gso_segment is updated but following frag_skbs are not updated.
> >>>>
> >>>> A call path skb_mac_gso_segment -> inet_gso_segment ->
> >>>> udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
> >>>> does not try to update UDP/IP header of the segment list.
> >>>
> >>> We still need to find out why it works for Alexander, but not for you.
> >>> Different usecases?
> >>
> >> This patch is not for
> >> https://lore.kernel.org/patchwork/patch/1364544/
> >> Alexander might want to call udp_gro_receive_segment even when
> >> !sk and ~NETIF_F_GRO_FRAGLIST.
> >
> > Yes, I know. But he said that fraglist GRO + NAT works for him.
> > I want to find out why it works for him, but not for you.
> 
> I found that it worked for me because I advertised fraglist GSO
> support in my driver (and added actual support for xmitting
> fraglists). If so, kernel won't resegment GSO into a list of
> plain packets, so no __udp_gso_segment_list() will be called.
> 
> I think it will break if I disable fraglist GSO feature through
> Ethtool, so I could test your patches.

Thanks for the reply. In my case I enabled NETIF_F_GRO_FRAGLIST on
my driver. It expected that NAT done on each skb of the forwarded
fraglist.

> 
> >>>
> >>> I would not like to add this to a generic codepath. I think we can
> >>> relatively easy copy the full headers in skb_segment_list().
> >>
> >> I tried to copy the full headers with the similar approach, but it
> >> copies length too. Can we keep the length of each skb of the fraglist?
> >
> > Ah yes, good point.
> >
> > Then maybe you can move your approach into __udp_gso_segment_list()
> > so that we dont touch generic code.
> >

Okay, I will move it into __udp_gso_segment_list() on v3.

> >>
> >>>
> >>> I think about something like the (completely untested) patch below:
> >>>
> >>> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> >>> index f62cae3f75d8..63ae7f79fad7 100644
> >>> --- a/net/core/skbuff.c
> >>> +++ b/net/core/skbuff.c
> >>> @@ -3651,13 +3651,14 @@ struct sk_buff *skb_segment_list(struct sk_buff 
> >>> *skb,
> >>>unsigned int offset)
> >>>  {
> >>>   struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
> >>> + unsigned int doffset = skb->data - skb_mac_header(skb);
> >>>   unsigned int tnl_hlen = skb_tnl_header_len(skb);
> >>>   unsigned int delta_truesize = 0;
> >>>   unsigned int delta_len = 0;
> >>>   struct sk_buff *tail = NULL;
> >>>   struct sk_buff *nskb;
> >>>
> >>> - skb_push(skb, -skb_network_offset(skb) + offset);
> >>> + skb_push(skb, doffset);
> >>>
> >>>   skb_shinfo(skb)->frag_list = NULL;
> >>>
> >>> @@ -3675,7 +3676,7 @@ struct sk_buff *skb_segment_list(struct sk_buff 
> >>> *skb,
> >>>   delta_len += nskb->len;
> >>>   delta_truesize += nskb->truesize;
> >>>
> >>> - skb_push(nskb, -skb_network_offset(nskb) + offset);
> >>> + skb_push(nskb, doffset);
> >>>
> >>>   skb_release_head_state(nskb);
> >>>__copy_skb_header(nskb, skb);
> >>> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> >>> index ff39e94781bf..1181398378b8 100644
> >>> --- a/net/ipv4/udp_offload.c
> >>> +++ b/net/ipv4/udp_offload.c
> >>> @@ -190,9 +190,22 @@ EXPORT_SYMBOL(skb_udp_tunnel_segment);
> >>>  static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
> >>> netdev_features_t features)
> >>>  {
> >>> + struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
> >>>   unsigned int mss = skb_shinfo(skb)->gso_size;
> >>> + unsigned int offset;
> >>>
> &g

RE: [PATCH net] net: fix use-after-free when UDP GRO with shared fraglist

2021-04-18 Thread Dongseok Yi
On Sat, Apr 17, 2021 at 11:44:35AM +0800, Yunsheng Lin wrote:
> 
> On 2021/1/6 11:32, Dongseok Yi wrote:
> > On 2021-01-06 12:07, Willem de Bruijn wrote:
> >>
> >> On Tue, Jan 5, 2021 at 8:29 PM Dongseok Yi  wrote:
> >>>
> >>> On 2021-01-05 06:03, Willem de Bruijn wrote:
> >>>>
> >>>> On Mon, Jan 4, 2021 at 4:00 AM Dongseok Yi  wrote:
> >>>>>
> >>>>> skbs in frag_list could be shared by pskb_expand_head() from BPF.
> >>>>
> >>>> Can you elaborate on the BPF connection?
> >>>
> >>> With the following registered ptypes,
> >>>
> >>> /proc/net # cat ptype
> >>> Type Device  Function
> >>> ALL   tpacket_rcv
> >>> 0800  ip_rcv.cfi_jt
> >>> 0011  llc_rcv.cfi_jt
> >>> 0004  llc_rcv.cfi_jt
> >>> 0806  arp_rcv
> >>> 86dd  ipv6_rcv.cfi_jt
> >>>
> >>> BPF checks skb_ensure_writable between tpacket_rcv and ip_rcv
> >>> (or ipv6_rcv). And it calls pskb_expand_head.
> >>>
> >>> [  132.051228] pskb_expand_head+0x360/0x378
> >>> [  132.051237] skb_ensure_writable+0xa0/0xc4
> >>> [  132.051249] bpf_skb_pull_data+0x28/0x60
> >>> [  132.051262] bpf_prog_331d69c77ea5e964_schedcls_ingres+0x5f4/0x1000
> >>> [  132.051273] cls_bpf_classify+0x254/0x348
> >>> [  132.051284] tcf_classify+0xa4/0x180
> >>
> >> Ah, you have a BPF program loaded at TC. That was not entirely obvious.
> >>
> >> This program gets called after packet sockets with ptype_all, before
> >> those with a specific protocol.
> >>
> >> Tcpdump will have inserted a program with ptype_all, which cloned the
> >> skb. This triggers skb_ensure_writable -> pskb_expand_head ->
> >> skb_clone_fraglist -> skb_get.
> >>
> >>> [  132.051294] __netif_receive_skb_core+0x590/0xd28
> >>> [  132.051303] __netif_receive_skb+0x50/0x17c
> >>> [  132.051312] process_backlog+0x15c/0x1b8
> >>>
> >>>>
> >>>>> While tcpdump, sk_receive_queue of PF_PACKET has the original frag_list.
> >>>>> But the same frag_list is queued to PF_INET (or PF_INET6) as the 
> >>>>> fraglist
> >>>>> chain made by skb_segment_list().
> >>>>>
> >>>>> If the new skb (not frag_list) is queued to one of the sk_receive_queue,
> >>>>> multiple ptypes can see this. The skb could be released by ptypes and
> >>>>> it causes use-after-free.
> >>>>
> >>>> If I understand correctly, a udp-gro-list skb makes it up the receive
> >>>> path with one or more active packet sockets.
> >>>>
> >>>> The packet socket will call skb_clone after accepting the filter. This
> >>>> replaces the head_skb, but shares the skb_shinfo and thus frag_list.
> >>>>
> >>>> udp_rcv_segment later converts the udp-gro-list skb to a list of
> >>>> regular packets to pass these one-by-one to udp_queue_rcv_one_skb.
> >>>> Now all the frags are fully fledged packets, with headers pushed
> >>>> before the payload. This does not change their refcount anymore than
> >>>> the skb_clone in pf_packet did. This should be 1.
> >>>>
> >>>> Eventually udp_recvmsg will call skb_consume_udp on each packet.
> >>>>
> >>>> The packet socket eventually also frees its cloned head_skb, which 
> >>>> triggers
> >>>>
> >>>>   kfree_skb_list(shinfo->frag_list)
> >>>> kfree_skb
> >>>>   skb_unref
> >>>> refcount_dec_and_test(&skb->users)
> >>>
> >>> Every your understanding is right, but
> >>>
> >>>>
> >>>>>
> >>>>> [ 4443.426215] [ cut here ]
> >>>>> [ 4443.426222] refcount_t: underflow; use-after-free.
> >>>>> [ 4443.426291] WARNING: CPU: 7 PID: 28161 at lib/refcount.c:190
> >>>>> refcount_dec_and_test_checked+0xa4/0xc8
> >>>>> [ 4443.426726] pstate: 6045 (nZCv daif +PAN -UAO)
> >>>>> [ 4443.426732] pc : refcount_dec_and_test_checked+0xa4/0xc8
> >>>>> [ 4443.426737] lr : refcount_dec_and_test_checked+0xa0/0xc8
> >>>>> [ 444

RE: [PATCH net v2] udp: ipv4: manipulate network header of NATed UDP GRO fraglist

2021-01-19 Thread Dongseok Yi
On 2021-01-18 22:27, Steffen Klassert wrote:
> On Fri, Jan 15, 2021 at 10:20:35PM +0900, Dongseok Yi wrote:
> > UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
> > forwarding. Only the header of head_skb from ip_finish_output_gso ->
> > skb_gso_segment is updated but following frag_skbs are not updated.
> >
> > A call path skb_mac_gso_segment -> inet_gso_segment ->
> > udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
> > does not try to update UDP/IP header of the segment list but copy
> > only the MAC header.
> >
> > Update dport, daddr and checksums of each skb of the segment list
> > in __udp_gso_segment_list. It covers both SNAT and DNAT.
> >
> > Fixes: 9fd1ff5d2ac7 (udp: Support UDP fraglist GRO/GSO.)
> > Signed-off-by: Dongseok Yi 
> > ---
> > v1:
> > Steffen Klassert said, there could be 2 options.
> > https://lore.kernel.org/patchwork/patch/1362257/
> > I was trying to write a quick fix, but it was not easy to forward
> > segmented list. Currently, assuming DNAT only.
> >
> > v2:
> > Per Steffen Klassert request, move the procedure from
> > udp4_ufo_fragment to __udp_gso_segment_list and support SNAT.
> >
> > To Alexander Lobakin, I've checked your email late. Just use this
> > patch as a reference. It support SNAT too, but does not support IPv6
> > yet. I cannot make IPv6 header changes in __udp_gso_segment_list due
> > to the file is in IPv4 directory.
> >
> >  include/net/udp.h  |  2 +-
> >  net/ipv4/udp_offload.c | 62 
> > ++
> >  net/ipv6/udp_offload.c |  2 +-
> >  3 files changed, 59 insertions(+), 7 deletions(-)
> >
> > diff --git a/include/net/udp.h b/include/net/udp.h
> > index 877832b..01351ba 100644
> > --- a/include/net/udp.h
> > +++ b/include/net/udp.h
> > @@ -178,7 +178,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, 
> > struct sk_buff *skb,
> >  int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
> >
> >  struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
> > - netdev_features_t features);
> > + netdev_features_t features, bool is_ipv6);
> >
> >  static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
> >  {
> > diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> > index ff39e94..c532d3b 100644
> > --- a/net/ipv4/udp_offload.c
> > +++ b/net/ipv4/udp_offload.c
> > @@ -187,8 +187,57 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff 
> > *skb,
> >  }
> >  EXPORT_SYMBOL(skb_udp_tunnel_segment);
> >
> > +static void __udpv4_gso_segment_csum(struct sk_buff *seg,
> > +__be32 *oldip, __be32 *newip,
> > +__be16 *oldport, __be16 *newport)
> > +{
> > +   struct udphdr *uh = udp_hdr(seg);
> > +   struct iphdr *iph = ip_hdr(seg);
> > +
> > +   if (uh->check) {
> > +   inet_proto_csum_replace4(&uh->check, seg, *oldip, *newip,
> > +true);
> > +   inet_proto_csum_replace2(&uh->check, seg, *oldport, *newport,
> > +false);
> > +   if (!uh->check)
> > +   uh->check = CSUM_MANGLED_0;
> > +   }
> > +   uh->dest = *newport;
> > +
> > +   csum_replace4(&iph->check, *oldip, *newip);
> > +   iph->daddr = *newip;
> > +}
> 
> Can't we just do the checksum recalculation for this case as it is done
> with standard GRO?

If I understand standard GRO correctly, it calculates full checksum.
Should we adopt the same method to UDP GRO fraglist? I did not find
a simple method for the incremental checksum update.

> 
> > +
> > +static struct sk_buff *__udpv4_gso_segment_list_csum(struct sk_buff *segs)
> > +{
> > +   struct sk_buff *seg;
> > +   struct udphdr *uh, *uh2;
> > +   struct iphdr *iph, *iph2;
> > +
> > +   seg = segs;
> > +   uh = udp_hdr(seg);
> > +   iph = ip_hdr(seg);
> > +
> > +   while ((seg = seg->next)) {
> > +   uh2 = udp_hdr(seg);
> > +   iph2 = ip_hdr(seg);
> > +
> > +   if (uh->source != uh2->source || iph->saddr != iph2->saddr)
> > +   __udpv4_gso_segment_csum(seg,
> > +&iph2->saddr, &iph->saddr,
> > +&

RE: [PATCH net v2] udp: ipv4: manipulate network header of NATed UDP GRO fraglist

2021-01-21 Thread Dongseok Yi
On 2021-01-20 15:56, Dongseok Yi wrote:
> On 2021-01-18 22:27, Steffen Klassert wrote:
> > On Fri, Jan 15, 2021 at 10:20:35PM +0900, Dongseok Yi wrote:
> > > UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
> > > forwarding. Only the header of head_skb from ip_finish_output_gso ->
> > > skb_gso_segment is updated but following frag_skbs are not updated.
> > >
> > > A call path skb_mac_gso_segment -> inet_gso_segment ->
> > > udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
> > > does not try to update UDP/IP header of the segment list but copy
> > > only the MAC header.
> > >
> > > Update dport, daddr and checksums of each skb of the segment list
> > > in __udp_gso_segment_list. It covers both SNAT and DNAT.
> > >
> > > Fixes: 9fd1ff5d2ac7 (udp: Support UDP fraglist GRO/GSO.)
> > > Signed-off-by: Dongseok Yi 
> > > ---
> > > v1:
> > > Steffen Klassert said, there could be 2 options.
> > > https://lore.kernel.org/patchwork/patch/1362257/
> > > I was trying to write a quick fix, but it was not easy to forward
> > > segmented list. Currently, assuming DNAT only.
> > >
> > > v2:
> > > Per Steffen Klassert request, move the procedure from
> > > udp4_ufo_fragment to __udp_gso_segment_list and support SNAT.
> > >
> > > To Alexander Lobakin, I've checked your email late. Just use this
> > > patch as a reference. It support SNAT too, but does not support IPv6
> > > yet. I cannot make IPv6 header changes in __udp_gso_segment_list due
> > > to the file is in IPv4 directory.
> > >
> > >  include/net/udp.h  |  2 +-
> > >  net/ipv4/udp_offload.c | 62 
> > > ++
> > >  net/ipv6/udp_offload.c |  2 +-
> > >  3 files changed, 59 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/include/net/udp.h b/include/net/udp.h
> > > index 877832b..01351ba 100644
> > > --- a/include/net/udp.h
> > > +++ b/include/net/udp.h
> > > @@ -178,7 +178,7 @@ struct sk_buff *udp_gro_receive(struct list_head 
> > > *head, struct sk_buff *skb,
> > >  int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t 
> > > lookup);
> > >
> > >  struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
> > > -   netdev_features_t features);
> > > +   netdev_features_t features, bool is_ipv6);
> > >
> > >  static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
> > >  {
> > > diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> > > index ff39e94..c532d3b 100644
> > > --- a/net/ipv4/udp_offload.c
> > > +++ b/net/ipv4/udp_offload.c
> > > @@ -187,8 +187,57 @@ struct sk_buff *skb_udp_tunnel_segment(struct 
> > > sk_buff *skb,
> > >  }
> > >  EXPORT_SYMBOL(skb_udp_tunnel_segment);
> > >
> > > +static void __udpv4_gso_segment_csum(struct sk_buff *seg,
> > > +  __be32 *oldip, __be32 *newip,
> > > +  __be16 *oldport, __be16 *newport)
> > > +{
> > > + struct udphdr *uh = udp_hdr(seg);
> > > + struct iphdr *iph = ip_hdr(seg);
> > > +
> > > + if (uh->check) {
> > > + inet_proto_csum_replace4(&uh->check, seg, *oldip, *newip,
> > > +  true);
> > > + inet_proto_csum_replace2(&uh->check, seg, *oldport, *newport,
> > > +  false);
> > > + if (!uh->check)
> > > + uh->check = CSUM_MANGLED_0;
> > > + }
> > > + uh->dest = *newport;
> > > +
> > > + csum_replace4(&iph->check, *oldip, *newip);
> > > + iph->daddr = *newip;
> > > +}
> >
> > Can't we just do the checksum recalculation for this case as it is done
> > with standard GRO?
> 
> If I understand standard GRO correctly, it calculates full checksum.
> Should we adopt the same method to UDP GRO fraglist? I did not find
> a simple method for the incremental checksum update.
> 
> >
> > > +
> > > +static struct sk_buff *__udpv4_gso_segment_list_csum(struct sk_buff 
> > > *segs)
> > > +{
> > > + struct sk_buff *seg;
> > > + struct udphdr *uh, *uh2;
> > > + struct iphdr *iph, *iph2;
> > > +
> > > + seg = segs;
> 

RE: [PATCH net v2] udp: ipv4: manipulate network header of NATed UDP GRO fraglist

2021-01-21 Thread Dongseok Yi
On 2021-01-21 21:28, Steffen Klassert wrote:
> On Wed, Jan 20, 2021 at 03:55:42PM +0900, Dongseok Yi wrote:
> > On 2021-01-18 22:27, Steffen Klassert wrote:
> > > On Fri, Jan 15, 2021 at 10:20:35PM +0900, Dongseok Yi wrote:
> > > > UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
> > > > forwarding. Only the header of head_skb from ip_finish_output_gso ->
> > > > skb_gso_segment is updated but following frag_skbs are not updated.
> > > >
> > > > A call path skb_mac_gso_segment -> inet_gso_segment ->
> > > > udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
> > > > does not try to update UDP/IP header of the segment list but copy
> > > > only the MAC header.
> > > >
> > > > Update dport, daddr and checksums of each skb of the segment list
> > > > in __udp_gso_segment_list. It covers both SNAT and DNAT.
> > > >
> > > > Fixes: 9fd1ff5d2ac7 (udp: Support UDP fraglist GRO/GSO.)
> > > > Signed-off-by: Dongseok Yi 
> > > > ---
> > > > v1:
> > > > Steffen Klassert said, there could be 2 options.
> > > > https://lore.kernel.org/patchwork/patch/1362257/
> > > > I was trying to write a quick fix, but it was not easy to forward
> > > > segmented list. Currently, assuming DNAT only.
> > > >
> > > > v2:
> > > > Per Steffen Klassert request, move the procedure from
> > > > udp4_ufo_fragment to __udp_gso_segment_list and support SNAT.
> > > >
> > > > To Alexander Lobakin, I've checked your email late. Just use this
> > > > patch as a reference. It support SNAT too, but does not support IPv6
> > > > yet. I cannot make IPv6 header changes in __udp_gso_segment_list due
> > > > to the file is in IPv4 directory.
> > > >
> > > >  include/net/udp.h  |  2 +-
> > > >  net/ipv4/udp_offload.c | 62 
> > > > ++
> > > >  net/ipv6/udp_offload.c |  2 +-
> > > >  3 files changed, 59 insertions(+), 7 deletions(-)
> > > >
> > > > diff --git a/include/net/udp.h b/include/net/udp.h
> > > > index 877832b..01351ba 100644
> > > > --- a/include/net/udp.h
> > > > +++ b/include/net/udp.h
> > > > @@ -178,7 +178,7 @@ struct sk_buff *udp_gro_receive(struct list_head 
> > > > *head, struct sk_buff *skb,
> > > >  int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t 
> > > > lookup);
> > > >
> > > >  struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
> > > > - netdev_features_t features);
> > > > + netdev_features_t features, bool 
> > > > is_ipv6);
> > > >
> > > >  static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
> > > >  {
> > > > diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> > > > index ff39e94..c532d3b 100644
> > > > --- a/net/ipv4/udp_offload.c
> > > > +++ b/net/ipv4/udp_offload.c
> > > > @@ -187,8 +187,57 @@ struct sk_buff *skb_udp_tunnel_segment(struct 
> > > > sk_buff *skb,
> > > >  }
> > > >  EXPORT_SYMBOL(skb_udp_tunnel_segment);
> > > >
> > > > +static void __udpv4_gso_segment_csum(struct sk_buff *seg,
> > > > +__be32 *oldip, __be32 *newip,
> > > > +__be16 *oldport, __be16 *newport)
> > > > +{
> > > > +   struct udphdr *uh = udp_hdr(seg);
> > > > +   struct iphdr *iph = ip_hdr(seg);
> > > > +
> > > > +   if (uh->check) {
> > > > +   inet_proto_csum_replace4(&uh->check, seg, *oldip, 
> > > > *newip,
> > > > +true);
> > > > +   inet_proto_csum_replace2(&uh->check, seg, *oldport, 
> > > > *newport,
> > > > +false);
> > > > +   if (!uh->check)
> > > > +   uh->check = CSUM_MANGLED_0;
> > > > +   }
> > > > +   uh->dest = *newport;
> > > > +
> > > > +   csum_replace4(&iph->check, *oldip, *newip);
> > > > +   iph->daddr = *newip;
> > > >

[PATCH net v3] udp: ipv4: manipulate network header of NATed UDP GRO fraglist

2021-01-21 Thread Dongseok Yi
UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
forwarding. Only the header of head_skb from ip_finish_output_gso ->
skb_gso_segment is updated but following frag_skbs are not updated.

A call path skb_mac_gso_segment -> inet_gso_segment ->
udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
does not try to update UDP/IP header of the segment list but copy
only the MAC header.

Update port, addr and check of each skb of the segment list in
__udp_gso_segment_list. It covers both SNAT and DNAT.

Fixes: 9fd1ff5d2ac7 (udp: Support UDP fraglist GRO/GSO.)
Signed-off-by: Dongseok Yi 
---
v1:
Steffen Klassert said, there could be 2 options.
https://lore.kernel.org/patchwork/patch/1362257/
I was trying to write a quick fix, but it was not easy to forward
segmented list. Currently, assuming DNAT only.

v2:
Per Steffen Klassert request, moved the procedure from
udp4_ufo_fragment to __udp_gso_segment_list and support SNAT.

v3:
Per Steffen Klassert request, applied fast return by comparing seg
and seg->next at the beginning of __udpv4_gso_segment_list_csum.

Fixed uh->dest = *newport and iph->daddr = *newip to
*oldport = *newport and *oldip = *newip.

 include/net/udp.h  |  2 +-
 net/ipv4/udp_offload.c | 72 ++
 net/ipv6/udp_offload.c |  2 +-
 3 files changed, 69 insertions(+), 7 deletions(-)

diff --git a/include/net/udp.h b/include/net/udp.h
index 877832b..01351ba 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -178,7 +178,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, 
struct sk_buff *skb,
 int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
 
 struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
- netdev_features_t features);
+ netdev_features_t features, bool is_ipv6);
 
 static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
 {
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index ff39e94..43660cf 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -187,8 +187,67 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 }
 EXPORT_SYMBOL(skb_udp_tunnel_segment);
 
+static void __udpv4_gso_segment_csum(struct sk_buff *seg,
+__be32 *oldip, __be32 *newip,
+__be16 *oldport, __be16 *newport)
+{
+   struct udphdr *uh;
+   struct iphdr *iph;
+
+   if (*oldip == *newip && *oldport == *newport)
+   return;
+
+   uh = udp_hdr(seg);
+   iph = ip_hdr(seg);
+
+   if (uh->check) {
+   inet_proto_csum_replace4(&uh->check, seg, *oldip, *newip,
+true);
+   inet_proto_csum_replace2(&uh->check, seg, *oldport, *newport,
+false);
+   if (!uh->check)
+   uh->check = CSUM_MANGLED_0;
+   }
+   *oldport = *newport;
+
+   csum_replace4(&iph->check, *oldip, *newip);
+   *oldip = *newip;
+}
+
+static struct sk_buff *__udpv4_gso_segment_list_csum(struct sk_buff *segs)
+{
+   struct sk_buff *seg;
+   struct udphdr *uh, *uh2;
+   struct iphdr *iph, *iph2;
+
+   seg = segs;
+   uh = udp_hdr(seg);
+   iph = ip_hdr(seg);
+
+   if ((udp_hdr(seg)->dest == udp_hdr(seg->next)->dest) &&
+   (udp_hdr(seg)->source == udp_hdr(seg->next)->source) &&
+   (ip_hdr(seg)->daddr == ip_hdr(seg->next)->daddr) &&
+   (ip_hdr(seg)->saddr == ip_hdr(seg->next)->saddr))
+   return segs;
+
+   while ((seg = seg->next)) {
+   uh2 = udp_hdr(seg);
+   iph2 = ip_hdr(seg);
+
+   __udpv4_gso_segment_csum(seg,
+&iph2->saddr, &iph->saddr,
+&uh2->source, &uh->source);
+   __udpv4_gso_segment_csum(seg,
+&iph2->daddr, &iph->daddr,
+&uh2->dest, &uh->dest);
+   }
+
+   return segs;
+}
+
 static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
- netdev_features_t features)
+ netdev_features_t features,
+ bool is_ipv6)
 {
unsigned int mss = skb_shinfo(skb)->gso_size;
 
@@ -198,11 +257,14 @@ static struct sk_buff *__udp_gso_segment_list(struct 
sk_buff *skb,
 
udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss);
 
-   return skb;
+   if (is_ipv6)
+   return skb;
+   else
+   return __udpv4_gso_segment_list_csum(skb);
 }
 
 struct s

RE: [PATCH net v3] udp: ipv4: manipulate network header of NATed UDP GRO fraglist

2021-01-26 Thread Dongseok Yi
On 1/25/21 9:45 PM, Steffen Klassert wrote:
> On Thu, Jan 21, 2021 at 10:24:39PM +0900, Dongseok Yi wrote:
> > UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
> > forwarding. Only the header of head_skb from ip_finish_output_gso ->
> > skb_gso_segment is updated but following frag_skbs are not updated.
> >
> > A call path skb_mac_gso_segment -> inet_gso_segment ->
> > udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
> > does not try to update UDP/IP header of the segment list but copy
> > only the MAC header.
> >
> > Update port, addr and check of each skb of the segment list in
> > __udp_gso_segment_list. It covers both SNAT and DNAT.
> >
> > Fixes: 9fd1ff5d2ac7 (udp: Support UDP fraglist GRO/GSO.)
> > Signed-off-by: Dongseok Yi 
> > ---
> > v1:
> > Steffen Klassert said, there could be 2 options.
> > https://lore.kernel.org/patchwork/patch/1362257/
> > I was trying to write a quick fix, but it was not easy to forward
> > segmented list. Currently, assuming DNAT only.
> >
> > v2:
> > Per Steffen Klassert request, moved the procedure from
> > udp4_ufo_fragment to __udp_gso_segment_list and support SNAT.
> >
> > v3:
> > Per Steffen Klassert request, applied fast return by comparing seg
> > and seg->next at the beginning of __udpv4_gso_segment_list_csum.
> >
> > Fixed uh->dest = *newport and iph->daddr = *newip to
> > *oldport = *newport and *oldip = *newip.
> >
> >  include/net/udp.h  |  2 +-
> >  net/ipv4/udp_offload.c | 72 
> > ++
> >  net/ipv6/udp_offload.c |  2 +-
> >  3 files changed, 69 insertions(+), 7 deletions(-)
> >
> > diff --git a/include/net/udp.h b/include/net/udp.h
> > index 877832b..01351ba 100644
> > --- a/include/net/udp.h
> > +++ b/include/net/udp.h
> > @@ -178,7 +178,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, 
> > struct sk_buff *skb,
> >  int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
> >
> >  struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
> > - netdev_features_t features);
> > + netdev_features_t features, bool is_ipv6);
> >
> >  static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
> >  {
> > diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> > index ff39e94..43660cf 100644
> > --- a/net/ipv4/udp_offload.c
> > +++ b/net/ipv4/udp_offload.c
> > @@ -187,8 +187,67 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff 
> > *skb,
> >  }
> >  EXPORT_SYMBOL(skb_udp_tunnel_segment);
> >
> > +static void __udpv4_gso_segment_csum(struct sk_buff *seg,
> > +__be32 *oldip, __be32 *newip,
> > +__be16 *oldport, __be16 *newport)
> > +{
> > +   struct udphdr *uh;
> > +   struct iphdr *iph;
> > +
> > +   if (*oldip == *newip && *oldport == *newport)
> > +   return;
> 
> This check is redundant as you check this already in
> __udpv4_gso_segment_list_csum.

When comes in __udpv4_gso_segment_csum, the condition would be
SNAT or DNAT. I think we don't need to do the function if the
condition is not met. I want to skip the function for SNAT checksum
when DNAT only case. Is it better to remove the check?

> 
> Looks ok otherwise.
> 
> > +
> > +   uh = udp_hdr(seg);
> > +   iph = ip_hdr(seg);
> > +
> > +   if (uh->check) {
> > +   inet_proto_csum_replace4(&uh->check, seg, *oldip, *newip,
> > +true);
> > +   inet_proto_csum_replace2(&uh->check, seg, *oldport, *newport,
> > +false);
> > +   if (!uh->check)
> > +   uh->check = CSUM_MANGLED_0;
> > +   }
> > +   *oldport = *newport;
> > +
> > +   csum_replace4(&iph->check, *oldip, *newip);
> > +   *oldip = *newip;
> > +}
> > +
> > +static struct sk_buff *__udpv4_gso_segment_list_csum(struct sk_buff *segs)
> > +{
> > +   struct sk_buff *seg;
> > +   struct udphdr *uh, *uh2;
> > +   struct iphdr *iph, *iph2;
> > +
> > +   seg = segs;
> > +   uh = udp_hdr(seg);
> > +   iph = ip_hdr(seg);
> > +
> > +   if ((udp_hdr(seg)->dest == udp_hdr(seg->next)->dest) &&
> > +   (udp_hdr(seg)->source == udp_hdr(seg->next)->source) &&
> > +   (ip_hdr(seg)

[PATCH net] net: fix use-after-free when UDP GRO with shared fraglist

2021-01-04 Thread Dongseok Yi
skbs in frag_list could be shared by pskb_expand_head() from BPF.

While tcpdump, sk_receive_queue of PF_PACKET has the original frag_list.
But the same frag_list is queued to PF_INET (or PF_INET6) as the fraglist
chain made by skb_segment_list().

If the new skb (not frag_list) is queued to one of the sk_receive_queue,
multiple ptypes can see this. The skb could be released by ptypes and
it causes use-after-free.

[ 4443.426215] [ cut here ]
[ 4443.426222] refcount_t: underflow; use-after-free.
[ 4443.426291] WARNING: CPU: 7 PID: 28161 at lib/refcount.c:190
refcount_dec_and_test_checked+0xa4/0xc8
[ 4443.426726] pstate: 6045 (nZCv daif +PAN -UAO)
[ 4443.426732] pc : refcount_dec_and_test_checked+0xa4/0xc8
[ 4443.426737] lr : refcount_dec_and_test_checked+0xa0/0xc8
[ 4443.426808] Call trace:
[ 4443.426813]  refcount_dec_and_test_checked+0xa4/0xc8
[ 4443.426823]  skb_release_data+0x144/0x264
[ 4443.426828]  kfree_skb+0x58/0xc4
[ 4443.426832]  skb_queue_purge+0x64/0x9c
[ 4443.426844]  packet_set_ring+0x5f0/0x820
[ 4443.426849]  packet_setsockopt+0x5a4/0xcd0
[ 4443.426853]  __sys_setsockopt+0x188/0x278
[ 4443.426858]  __arm64_sys_setsockopt+0x28/0x38
[ 4443.426869]  el0_svc_common+0xf0/0x1d0
[ 4443.426873]  el0_svc_handler+0x74/0x98
[ 4443.426880]  el0_svc+0x8/0xc

Fixes: 3a1296a38d0c (net: Support GRO/GSO fraglist chaining.)
Signed-off-by: Dongseok Yi 
---
 net/core/skbuff.c | 20 +++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f62cae3..1dcbda8 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3655,7 +3655,8 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
unsigned int delta_truesize = 0;
unsigned int delta_len = 0;
struct sk_buff *tail = NULL;
-   struct sk_buff *nskb;
+   struct sk_buff *nskb, *tmp;
+   int err;
 
skb_push(skb, -skb_network_offset(skb) + offset);
 
@@ -3665,11 +3666,28 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
nskb = list_skb;
list_skb = list_skb->next;
 
+   err = 0;
+   if (skb_shared(nskb)) {
+   tmp = skb_clone(nskb, GFP_ATOMIC);
+   if (tmp) {
+   kfree_skb(nskb);
+   nskb = tmp;
+   err = skb_unclone(nskb, GFP_ATOMIC);
+   } else {
+   err = -ENOMEM;
+   }
+   }
+
if (!tail)
skb->next = nskb;
else
tail->next = nskb;
 
+   if (unlikely(err)) {
+   nskb->next = list_skb;
+   goto err_linearize;
+   }
+
tail = nskb;
 
delta_len += nskb->len;
-- 
2.7.4



[PATCH net v2] udp: ipv4: manipulate network header of NATed UDP GRO fraglist

2021-01-15 Thread Dongseok Yi
UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
forwarding. Only the header of head_skb from ip_finish_output_gso ->
skb_gso_segment is updated but following frag_skbs are not updated.

A call path skb_mac_gso_segment -> inet_gso_segment ->
udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
does not try to update UDP/IP header of the segment list but copy
only the MAC header.

Update dport, daddr and checksums of each skb of the segment list
in __udp_gso_segment_list. It covers both SNAT and DNAT.

Fixes: 9fd1ff5d2ac7 (udp: Support UDP fraglist GRO/GSO.)
Signed-off-by: Dongseok Yi 
---
v1:
Steffen Klassert said, there could be 2 options.
https://lore.kernel.org/patchwork/patch/1362257/
I was trying to write a quick fix, but it was not easy to forward
segmented list. Currently, assuming DNAT only.

v2:
Per Steffen Klassert request, move the procedure from
udp4_ufo_fragment to __udp_gso_segment_list and support SNAT.

To Alexander Lobakin, I've checked your email late. Just use this
patch as a reference. It support SNAT too, but does not support IPv6
yet. I cannot make IPv6 header changes in __udp_gso_segment_list due
to the file is in IPv4 directory.

 include/net/udp.h  |  2 +-
 net/ipv4/udp_offload.c | 62 ++
 net/ipv6/udp_offload.c |  2 +-
 3 files changed, 59 insertions(+), 7 deletions(-)

diff --git a/include/net/udp.h b/include/net/udp.h
index 877832b..01351ba 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -178,7 +178,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, 
struct sk_buff *skb,
 int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
 
 struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
- netdev_features_t features);
+ netdev_features_t features, bool is_ipv6);
 
 static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
 {
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index ff39e94..c532d3b 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -187,8 +187,57 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 }
 EXPORT_SYMBOL(skb_udp_tunnel_segment);
 
+static void __udpv4_gso_segment_csum(struct sk_buff *seg,
+__be32 *oldip, __be32 *newip,
+__be16 *oldport, __be16 *newport)
+{
+   struct udphdr *uh = udp_hdr(seg);
+   struct iphdr *iph = ip_hdr(seg);
+
+   if (uh->check) {
+   inet_proto_csum_replace4(&uh->check, seg, *oldip, *newip,
+true);
+   inet_proto_csum_replace2(&uh->check, seg, *oldport, *newport,
+false);
+   if (!uh->check)
+   uh->check = CSUM_MANGLED_0;
+   }
+   uh->dest = *newport;
+
+   csum_replace4(&iph->check, *oldip, *newip);
+   iph->daddr = *newip;
+}
+
+static struct sk_buff *__udpv4_gso_segment_list_csum(struct sk_buff *segs)
+{
+   struct sk_buff *seg;
+   struct udphdr *uh, *uh2;
+   struct iphdr *iph, *iph2;
+
+   seg = segs;
+   uh = udp_hdr(seg);
+   iph = ip_hdr(seg);
+
+   while ((seg = seg->next)) {
+   uh2 = udp_hdr(seg);
+   iph2 = ip_hdr(seg);
+
+   if (uh->source != uh2->source || iph->saddr != iph2->saddr)
+   __udpv4_gso_segment_csum(seg,
+&iph2->saddr, &iph->saddr,
+&uh2->source, &uh->source);
+
+   if (uh->dest != uh2->dest || iph->daddr != iph2->daddr)
+   __udpv4_gso_segment_csum(seg,
+&iph2->daddr, &iph->daddr,
+&uh2->dest, &uh->dest);
+   }
+
+   return segs;
+}
+
 static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
- netdev_features_t features)
+ netdev_features_t features, bool 
is_ipv6)
 {
unsigned int mss = skb_shinfo(skb)->gso_size;
 
@@ -198,11 +247,14 @@ static struct sk_buff *__udp_gso_segment_list(struct 
sk_buff *skb,
 
udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss);
 
-   return skb;
+   if (is_ipv6)
+   return skb;
+   else
+   return __udpv4_gso_segment_list_csum(skb);
 }
 
 struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
- netdev_features_t features)
+ netdev_features_t features, bool is_ipv6)
 {
struct sock *sk = gso_skb->sk;
unsigned int sum_

RE: [PATCH net v2] udp: ipv4: manipulate network header of NATed UDP GRO fraglist

2021-01-17 Thread Dongseok Yi
On 2021-01-16 02:12, Alexander Lobakin wrote:
> From: Dongseok Yi 
> Date: Fri, 15 Jan 2021 22:20:35 +0900
> 
> > UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
> > forwarding. Only the header of head_skb from ip_finish_output_gso ->
> > skb_gso_segment is updated but following frag_skbs are not updated.
> >
> > A call path skb_mac_gso_segment -> inet_gso_segment ->
> > udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
> > does not try to update UDP/IP header of the segment list but copy
> > only the MAC header.
> >
> > Update dport, daddr and checksums of each skb of the segment list
> > in __udp_gso_segment_list. It covers both SNAT and DNAT.
> >
> > Fixes: 9fd1ff5d2ac7 (udp: Support UDP fraglist GRO/GSO.)
> > Signed-off-by: Dongseok Yi 
> > ---
> > v1:
> > Steffen Klassert said, there could be 2 options.
> > https://lore.kernel.org/patchwork/patch/1362257/
> > I was trying to write a quick fix, but it was not easy to forward
> > segmented list. Currently, assuming DNAT only.
> >
> > v2:
> > Per Steffen Klassert request, move the procedure from
> > udp4_ufo_fragment to __udp_gso_segment_list and support SNAT.
> >
> > To Alexander Lobakin, I've checked your email late. Just use this
> > patch as a reference. It support SNAT too, but does not support IPv6
> > yet. I cannot make IPv6 header changes in __udp_gso_segment_list due
> > to the file is in IPv4 directory.
> 
> I used another approach, tried to make fraglist GRO closer to plain
> in terms of checksummming, as it is confusing to me why GSO packet
> should have CHECKSUM_UNNECESSARY. Just let Netfilter do its mangling,
> and then use classic UDP GSO magic at the end of segmentation.
> I also see the idea of explicit comparing and editing of IP and UDP
> headers right in __udp_gso_segment_list() rather unacceptable.

If I understand UDP GRO fraglist correctly, it keeps the length of
each skb of the fraglist. But your approach might change the lengths
by gso_size. What if each skb of the fraglist had different lengths?

For CHECKSUM_UNNECESSARY, GROed head_skb might have an invalid
checksum. But finally, the fraglist will be segmented to queue to
sk_receive_queue with head_skb. We could pass the GROed head_skb with
CHECKSUM_UNNECESSARY.

> 
> Dongseok, Steffen, please test this WIP diff and tell if this one
> works for you, so I could clean up the code and make a patch.
> For me, it works now in any configurations, with and without
> checksum/GSO/fraglist offload.
> 
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index c1a6f262636a..646a42e88e83 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -3674,6 +3674,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
>unsigned int offset)
>  {
>   struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
> + unsigned int doffset = skb->data - skb_mac_header(skb);
>   unsigned int tnl_hlen = skb_tnl_header_len(skb);
>   unsigned int delta_truesize = 0;
>   unsigned int delta_len = 0;
> @@ -3681,7 +3682,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
>   struct sk_buff *nskb, *tmp;
>   int err;
> 
> - skb_push(skb, -skb_network_offset(skb) + offset);
> + skb_push(skb, doffset);
> 
>   skb_shinfo(skb)->frag_list = NULL;
> 
> @@ -3716,12 +3717,11 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
>   delta_len += nskb->len;
>   delta_truesize += nskb->truesize;
> 
> - skb_push(nskb, -skb_network_offset(nskb) + offset);
> + skb_push(nskb, skb_headroom(nskb) - skb_headroom(skb));
> 
>   skb_release_head_state(nskb);
> -  __copy_skb_header(nskb, skb);
> + __copy_skb_header(nskb, skb);
> 
> - skb_headers_offset_update(nskb, skb_headroom(nskb) - 
> skb_headroom(skb));
>   skb_copy_from_linear_data_offset(skb, -tnl_hlen,
>nskb->data - tnl_hlen,
>offset + tnl_hlen);
> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> index ff39e94781bf..61665fcd8c85 100644
> --- a/net/ipv4/udp_offload.c
> +++ b/net/ipv4/udp_offload.c
> @@ -190,13 +190,58 @@ EXPORT_SYMBOL(skb_udp_tunnel_segment);
>  static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
> netdev_features_t features)
>  {
> - unsigned int mss = skb_shinfo(skb)->gso_size;
> + struct sk_buff *seg;
> + struct udphdr *uh;
> + 

RE: [PATCH net v2] udp: ipv4: manipulate network header of NATed UDP GRO fraglist

2021-01-17 Thread Dongseok Yi
On 2021-01-18 15:37, Steffen Klassert wrote:
> On Fri, Jan 15, 2021 at 05:12:33PM +, Alexander Lobakin wrote:
> > From: Dongseok Yi 
> > Date: Fri, 15 Jan 2021 22:20:35 +0900
> >
> > > UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
> > > forwarding. Only the header of head_skb from ip_finish_output_gso ->
> > > skb_gso_segment is updated but following frag_skbs are not updated.
> > >
> > > A call path skb_mac_gso_segment -> inet_gso_segment ->
> > > udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
> > > does not try to update UDP/IP header of the segment list but copy
> > > only the MAC header.
> > >
> > > Update dport, daddr and checksums of each skb of the segment list
> > > in __udp_gso_segment_list. It covers both SNAT and DNAT.
> > >
> > > Fixes: 9fd1ff5d2ac7 (udp: Support UDP fraglist GRO/GSO.)
> > > Signed-off-by: Dongseok Yi 
> > > ---
> > > v1:
> > > Steffen Klassert said, there could be 2 options.
> > > https://lore.kernel.org/patchwork/patch/1362257/
> > > I was trying to write a quick fix, but it was not easy to forward
> > > segmented list. Currently, assuming DNAT only.
> > >
> > > v2:
> > > Per Steffen Klassert request, move the procedure from
> > > udp4_ufo_fragment to __udp_gso_segment_list and support SNAT.
> > >
> > > To Alexander Lobakin, I've checked your email late. Just use this
> > > patch as a reference. It support SNAT too, but does not support IPv6
> > > yet. I cannot make IPv6 header changes in __udp_gso_segment_list due
> > > to the file is in IPv4 directory.
> >
> > I used another approach, tried to make fraglist GRO closer to plain
> > in terms of checksummming, as it is confusing to me why GSO packet
> > should have CHECKSUM_UNNECESSARY.
> 
> This is intentional. With fraglist GRO, we don't mangle packets
> in the standard (non NAT) case. So the checksum is still correct
> after segmentation. That is one reason why it has good forwarding
> performance when software segmentation is needed. Checksuming
> touches the whole packet and has a lot of overhead, so it is
> heplfull to avoid it whenever possible.
> 
> We should find a way to do the checksum only when we really
> need it. I.e. only if the headers of the head skb changed.

It would be not easy to detect if the skb is mangled by netfilter. I
think v2 patch has little impact on the performance. Can you suggest
an another version? If not, I can make v3 including 80 columns
warning fix.



RE: [PATCH net] net: fix use-after-free when UDP GRO with shared fraglist

2021-01-05 Thread Dongseok Yi
On 2021-01-05 06:03, Willem de Bruijn wrote:
> 
> On Mon, Jan 4, 2021 at 4:00 AM Dongseok Yi  wrote:
> >
> > skbs in frag_list could be shared by pskb_expand_head() from BPF.
> 
> Can you elaborate on the BPF connection?

With the following registered ptypes,

/proc/net # cat ptype
Type Device  Function
ALL   tpacket_rcv
0800  ip_rcv.cfi_jt
0011  llc_rcv.cfi_jt
0004  llc_rcv.cfi_jt
0806  arp_rcv
86dd  ipv6_rcv.cfi_jt

BPF checks skb_ensure_writable between tpacket_rcv and ip_rcv
(or ipv6_rcv). And it calls pskb_expand_head.

[  132.051228] pskb_expand_head+0x360/0x378
[  132.051237] skb_ensure_writable+0xa0/0xc4
[  132.051249] bpf_skb_pull_data+0x28/0x60
[  132.051262] bpf_prog_331d69c77ea5e964_schedcls_ingres+0x5f4/0x1000
[  132.051273] cls_bpf_classify+0x254/0x348
[  132.051284] tcf_classify+0xa4/0x180
[  132.051294] __netif_receive_skb_core+0x590/0xd28
[  132.051303] __netif_receive_skb+0x50/0x17c
[  132.051312] process_backlog+0x15c/0x1b8

> 
> > While tcpdump, sk_receive_queue of PF_PACKET has the original frag_list.
> > But the same frag_list is queued to PF_INET (or PF_INET6) as the fraglist
> > chain made by skb_segment_list().
> >
> > If the new skb (not frag_list) is queued to one of the sk_receive_queue,
> > multiple ptypes can see this. The skb could be released by ptypes and
> > it causes use-after-free.
> 
> If I understand correctly, a udp-gro-list skb makes it up the receive
> path with one or more active packet sockets.
> 
> The packet socket will call skb_clone after accepting the filter. This
> replaces the head_skb, but shares the skb_shinfo and thus frag_list.
> 
> udp_rcv_segment later converts the udp-gro-list skb to a list of
> regular packets to pass these one-by-one to udp_queue_rcv_one_skb.
> Now all the frags are fully fledged packets, with headers pushed
> before the payload. This does not change their refcount anymore than
> the skb_clone in pf_packet did. This should be 1.
> 
> Eventually udp_recvmsg will call skb_consume_udp on each packet.
> 
> The packet socket eventually also frees its cloned head_skb, which triggers
> 
>   kfree_skb_list(shinfo->frag_list)
> kfree_skb
>   skb_unref
> refcount_dec_and_test(&skb->users)

Every your understanding is right, but

> 
> >
> > [ 4443.426215] [ cut here ]
> > [ 4443.426222] refcount_t: underflow; use-after-free.
> > [ 4443.426291] WARNING: CPU: 7 PID: 28161 at lib/refcount.c:190
> > refcount_dec_and_test_checked+0xa4/0xc8
> > [ 4443.426726] pstate: 6045 (nZCv daif +PAN -UAO)
> > [ 4443.426732] pc : refcount_dec_and_test_checked+0xa4/0xc8
> > [ 4443.426737] lr : refcount_dec_and_test_checked+0xa0/0xc8
> > [ 4443.426808] Call trace:
> > [ 4443.426813]  refcount_dec_and_test_checked+0xa4/0xc8
> > [ 4443.426823]  skb_release_data+0x144/0x264
> > [ 4443.426828]  kfree_skb+0x58/0xc4
> > [ 4443.426832]  skb_queue_purge+0x64/0x9c
> > [ 4443.426844]  packet_set_ring+0x5f0/0x820
> > [ 4443.426849]  packet_setsockopt+0x5a4/0xcd0
> > [ 4443.426853]  __sys_setsockopt+0x188/0x278
> > [ 4443.426858]  __arm64_sys_setsockopt+0x28/0x38
> > [ 4443.426869]  el0_svc_common+0xf0/0x1d0
> > [ 4443.426873]  el0_svc_handler+0x74/0x98
> > [ 4443.426880]  el0_svc+0x8/0xc
> >
> > Fixes: 3a1296a38d0c (net: Support GRO/GSO fraglist chaining.)
> > Signed-off-by: Dongseok Yi 
> > ---
> >  net/core/skbuff.c | 20 +++-
> >  1 file changed, 19 insertions(+), 1 deletion(-)
> >
> > diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> > index f62cae3..1dcbda8 100644
> > --- a/net/core/skbuff.c
> > +++ b/net/core/skbuff.c
> > @@ -3655,7 +3655,8 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
> > unsigned int delta_truesize = 0;
> > unsigned int delta_len = 0;
> > struct sk_buff *tail = NULL;
> > -   struct sk_buff *nskb;
> > +   struct sk_buff *nskb, *tmp;
> > +   int err;
> >
> > skb_push(skb, -skb_network_offset(skb) + offset);
> >
> > @@ -3665,11 +3666,28 @@ struct sk_buff *skb_segment_list(struct sk_buff 
> > *skb,
> > nskb = list_skb;
> > list_skb = list_skb->next;
> >
> > +   err = 0;
> > +   if (skb_shared(nskb)) {
> 
> I must be missing something still. This does not square with my
> understanding that the two sockets are operating on clones, with each
> frag_list skb having skb->users == 1.
> 
> Unless the packet socket patch previously also triggered an
> skb_unclone/pskb_expand_head, as

RE: [PATCH net] net: fix use-after-free when UDP GRO with shared fraglist

2021-01-05 Thread Dongseok Yi
On 2021-01-06 12:07, Willem de Bruijn wrote:
> 
> On Tue, Jan 5, 2021 at 8:29 PM Dongseok Yi  wrote:
> >
> > On 2021-01-05 06:03, Willem de Bruijn wrote:
> > >
> > > On Mon, Jan 4, 2021 at 4:00 AM Dongseok Yi  wrote:
> > > >
> > > > skbs in frag_list could be shared by pskb_expand_head() from BPF.
> > >
> > > Can you elaborate on the BPF connection?
> >
> > With the following registered ptypes,
> >
> > /proc/net # cat ptype
> > Type Device  Function
> > ALL   tpacket_rcv
> > 0800  ip_rcv.cfi_jt
> > 0011  llc_rcv.cfi_jt
> > 0004  llc_rcv.cfi_jt
> > 0806  arp_rcv
> > 86dd  ipv6_rcv.cfi_jt
> >
> > BPF checks skb_ensure_writable between tpacket_rcv and ip_rcv
> > (or ipv6_rcv). And it calls pskb_expand_head.
> >
> > [  132.051228] pskb_expand_head+0x360/0x378
> > [  132.051237] skb_ensure_writable+0xa0/0xc4
> > [  132.051249] bpf_skb_pull_data+0x28/0x60
> > [  132.051262] bpf_prog_331d69c77ea5e964_schedcls_ingres+0x5f4/0x1000
> > [  132.051273] cls_bpf_classify+0x254/0x348
> > [  132.051284] tcf_classify+0xa4/0x180
> 
> Ah, you have a BPF program loaded at TC. That was not entirely obvious.
> 
> This program gets called after packet sockets with ptype_all, before
> those with a specific protocol.
> 
> Tcpdump will have inserted a program with ptype_all, which cloned the
> skb. This triggers skb_ensure_writable -> pskb_expand_head ->
> skb_clone_fraglist -> skb_get.
> 
> > [  132.051294] __netif_receive_skb_core+0x590/0xd28
> > [  132.051303] __netif_receive_skb+0x50/0x17c
> > [  132.051312] process_backlog+0x15c/0x1b8
> >
> > >
> > > > While tcpdump, sk_receive_queue of PF_PACKET has the original frag_list.
> > > > But the same frag_list is queued to PF_INET (or PF_INET6) as the 
> > > > fraglist
> > > > chain made by skb_segment_list().
> > > >
> > > > If the new skb (not frag_list) is queued to one of the sk_receive_queue,
> > > > multiple ptypes can see this. The skb could be released by ptypes and
> > > > it causes use-after-free.
> > >
> > > If I understand correctly, a udp-gro-list skb makes it up the receive
> > > path with one or more active packet sockets.
> > >
> > > The packet socket will call skb_clone after accepting the filter. This
> > > replaces the head_skb, but shares the skb_shinfo and thus frag_list.
> > >
> > > udp_rcv_segment later converts the udp-gro-list skb to a list of
> > > regular packets to pass these one-by-one to udp_queue_rcv_one_skb.
> > > Now all the frags are fully fledged packets, with headers pushed
> > > before the payload. This does not change their refcount anymore than
> > > the skb_clone in pf_packet did. This should be 1.
> > >
> > > Eventually udp_recvmsg will call skb_consume_udp on each packet.
> > >
> > > The packet socket eventually also frees its cloned head_skb, which 
> > > triggers
> > >
> > >   kfree_skb_list(shinfo->frag_list)
> > > kfree_skb
> > >   skb_unref
> > > refcount_dec_and_test(&skb->users)
> >
> > Every your understanding is right, but
> >
> > >
> > > >
> > > > [ 4443.426215] [ cut here ]
> > > > [ 4443.426222] refcount_t: underflow; use-after-free.
> > > > [ 4443.426291] WARNING: CPU: 7 PID: 28161 at lib/refcount.c:190
> > > > refcount_dec_and_test_checked+0xa4/0xc8
> > > > [ 4443.426726] pstate: 6045 (nZCv daif +PAN -UAO)
> > > > [ 4443.426732] pc : refcount_dec_and_test_checked+0xa4/0xc8
> > > > [ 4443.426737] lr : refcount_dec_and_test_checked+0xa0/0xc8
> > > > [ 4443.426808] Call trace:
> > > > [ 4443.426813]  refcount_dec_and_test_checked+0xa4/0xc8
> > > > [ 4443.426823]  skb_release_data+0x144/0x264
> > > > [ 4443.426828]  kfree_skb+0x58/0xc4
> > > > [ 4443.426832]  skb_queue_purge+0x64/0x9c
> > > > [ 4443.426844]  packet_set_ring+0x5f0/0x820
> > > > [ 4443.426849]  packet_setsockopt+0x5a4/0xcd0
> > > > [ 4443.426853]  __sys_setsockopt+0x188/0x278
> > > > [ 4443.426858]  __arm64_sys_setsockopt+0x28/0x38
> > > > [ 4443.426869]  el0_svc_common+0xf0/0x1d0
> > > > [ 4443.426873]  el0_svc_handler+0x74/0x98
> > > > [ 4443.426880]  el0_svc+0x8/0xc
> > > >
> > > > Fixes: 3a1296a38d0c (net: Suppo

[PATCH net v2] net: fix use-after-free when UDP GRO with shared fraglist

2021-01-06 Thread Dongseok Yi
skbs in fraglist could be shared by a BPF filter loaded at TC. It
triggers skb_ensure_writable -> pskb_expand_head ->
skb_clone_fraglist -> skb_get on each skb in the fraglist.

While tcpdump, sk_receive_queue of PF_PACKET has the original fraglist.
But the same fraglist is queued to PF_INET (or PF_INET6) as the fraglist
chain made by skb_segment_list.

If the new skb (not fraglist) is queued to one of the sk_receive_queue,
multiple ptypes can see this. The skb could be released by ptypes and
it causes use-after-free.

[ 4443.426215] [ cut here ]
[ 4443.426222] refcount_t: underflow; use-after-free.
[ 4443.426291] WARNING: CPU: 7 PID: 28161 at lib/refcount.c:190
refcount_dec_and_test_checked+0xa4/0xc8
[ 4443.426726] pstate: 6045 (nZCv daif +PAN -UAO)
[ 4443.426732] pc : refcount_dec_and_test_checked+0xa4/0xc8
[ 4443.426737] lr : refcount_dec_and_test_checked+0xa0/0xc8
[ 4443.426808] Call trace:
[ 4443.426813]  refcount_dec_and_test_checked+0xa4/0xc8
[ 4443.426823]  skb_release_data+0x144/0x264
[ 4443.426828]  kfree_skb+0x58/0xc4
[ 4443.426832]  skb_queue_purge+0x64/0x9c
[ 4443.426844]  packet_set_ring+0x5f0/0x820
[ 4443.426849]  packet_setsockopt+0x5a4/0xcd0
[ 4443.426853]  __sys_setsockopt+0x188/0x278
[ 4443.426858]  __arm64_sys_setsockopt+0x28/0x38
[ 4443.426869]  el0_svc_common+0xf0/0x1d0
[ 4443.426873]  el0_svc_handler+0x74/0x98
[ 4443.426880]  el0_svc+0x8/0xc

Fixes: 3a1296a38d0c (net: Support GRO/GSO fraglist chaining.)
Signed-off-by: Dongseok Yi 
Acked-by: Willem de Bruijn 
---
 net/core/skbuff.c | 20 +++-
 1 file changed, 19 insertions(+), 1 deletion(-)

v2: Expand the commit message to clarify a BPF filter loaded

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f62cae3..1dcbda8 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3655,7 +3655,8 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
unsigned int delta_truesize = 0;
unsigned int delta_len = 0;
struct sk_buff *tail = NULL;
-   struct sk_buff *nskb;
+   struct sk_buff *nskb, *tmp;
+   int err;
 
skb_push(skb, -skb_network_offset(skb) + offset);
 
@@ -3665,11 +3666,28 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
nskb = list_skb;
list_skb = list_skb->next;
 
+   err = 0;
+   if (skb_shared(nskb)) {
+   tmp = skb_clone(nskb, GFP_ATOMIC);
+   if (tmp) {
+   kfree_skb(nskb);
+   nskb = tmp;
+   err = skb_unclone(nskb, GFP_ATOMIC);
+   } else {
+   err = -ENOMEM;
+   }
+   }
+
if (!tail)
skb->next = nskb;
else
tail->next = nskb;
 
+   if (unlikely(err)) {
+   nskb->next = list_skb;
+   goto err_linearize;
+   }
+
tail = nskb;
 
delta_len += nskb->len;
-- 
2.7.4