On Tue, 2016-11-15 at 12:51 -0800, Eric Dumazet wrote: > From: Eric Dumazet <eduma...@google.com> > > In commit 2331ccc5b323 ("tcp: enhance tcp collapsing"), > we made a first step allowing copying right skb to left skb head. > > Since all skbs in socket write queue are headless (but possibly the very > first one), this strategy often does not work. > > This patch extends tcp_collapse_retrans() to perform frag shifting, > thanks to skb_shift() helper. > > This helper needs to not BUG on non headless skbs, as callers are ok > with that. > > Tested: > > Following packetdrill test now passes : > > 0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 > +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 > +0 bind(3, ..., ...) = 0 > +0 listen(3, 1) = 0 > > +0 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 8> > +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> > +.100 < . 1:1(0) ack 1 win 257 > +0 accept(3, ..., ...) = 4 > > +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 > +0 write(4, ..., 200) = 200 > +0 > P. 1:201(200) ack 1 > +.001 write(4, ..., 200) = 200 > +0 > P. 201:401(200) ack 1 > +.001 write(4, ..., 200) = 200 > +0 > P. 401:601(200) ack 1 > +.001 write(4, ..., 200) = 200 > +0 > P. 601:801(200) ack 1 > +.001 write(4, ..., 200) = 200 > +0 > P. 801:1001(200) ack 1 > +.001 write(4, ..., 100) = 100 > +0 > P. 1001:1101(100) ack 1 > +.001 write(4, ..., 100) = 100 > +0 > P. 1101:1201(100) ack 1 > +.001 write(4, ..., 100) = 100 > +0 > P. 1201:1301(100) ack 1 > +.001 write(4, ..., 100) = 100 > +0 > P. 1301:1401(100) ack 1 > > +.099 < . 1:1(0) ack 201 win 257 > +.001 < . 1:1(0) ack 201 win 257 <nop,nop,sack 1001:1401> > +0 > P. 201:1001(800) ack 1 > > Signed-off-by: Eric Dumazet <eduma...@google.com> > Cc: Neal Cardwell <ncardw...@google.com> > Cc: Yuchung Cheng <ych...@google.com> > --- > net/core/skbuff.c | 4 +++- > net/ipv4/tcp_output.c | 22 +++++++++++----------- > 2 files changed, 14 insertions(+), 12 deletions(-) > > diff --git a/net/core/skbuff.c b/net/core/skbuff.c > index > 0b2a6e94af2de73ed638634c47a0fb71e2cbc1cb..a9cb81a10c4ba895587727aa4cf098e9a38424ea > 100644 > --- a/net/core/skbuff.c > +++ b/net/core/skbuff.c > @@ -2656,7 +2656,9 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, > int shiftlen) > struct skb_frag_struct *fragfrom, *fragto; > > BUG_ON(shiftlen > skb->len); > - BUG_ON(skb_headlen(skb)); /* Would corrupt stream */ > + > + if (skb_headlen(skb)) > + return 0; > > todo = shiftlen; > from = 0; > diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c > index > f57b5aa51b59cf0a58975fe34a7dcdb886ea8c50..19105b46a30436ebb85fe97ee43089e77aa028bb > 100644 > --- a/net/ipv4/tcp_output.c > +++ b/net/ipv4/tcp_output.c > @@ -2514,7 +2514,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb, > } > > /* Collapses two adjacent SKB's during retransmission. */ > -static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) > +static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) > { > struct tcp_sock *tp = tcp_sk(sk); > struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); > @@ -2525,14 +2525,17 @@ static void tcp_collapse_retrans(struct sock *sk, > struct sk_buff *skb) > > BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); > > + if (next_skb_size) { > + if (next_skb_size <= skb_availroom(skb)) > + skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size), > + next_skb_size); > + else if (!skb_shift(skb, next_skb, next_skb_size)) > + return false; > + } > tcp_highest_sack_combine(sk, next_skb, skb); > > tcp_unlink_write_queue(next_skb, sk); > > - if (next_skb_size) > - skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size), > - next_skb_size); > - > if (next_skb->ip_summed == CHECKSUM_PARTIAL) > skb->ip_summed = CHECKSUM_PARTIAL; > > @@ -2561,6 +2564,7 @@ static void tcp_collapse_retrans(struct sock *sk, > struct sk_buff *skb) > tcp_skb_collapse_tstamp(skb, next_skb); > > sk_wmem_free_skb(sk, next_skb); > + return true; > } > > /* Check if coalescing SKBs is legal. */ > @@ -2610,16 +2614,12 @@ static void tcp_retrans_try_collapse(struct sock *sk, > struct sk_buff *to, > > if (space < 0) > break; > - /* Punt if not enough space exists in the first SKB for > - * the data in the second > - */ > - if (skb->len > skb_availroom(to)) > - break; > > if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp))) > break; > > - tcp_collapse_retrans(sk, to); > + if (!tcp_collapse_retrans(sk, to)) > + break; > } > } >
David, patch is marked 'Superseded' in https://patchwork.ozlabs.org/patch/695264/ Not sure what this means exactly ? Did I miss a mail/feedback/something ? Thanks !