While testing my inet defrag changes, I found that the senders could spend ~20% of cpu cycles in skb_set_owner_w() updating sk->sk_wmem_alloc for every fragment they cook.
The solution to this problem is to use alloc_skb() instead of sock_wmalloc() and manually perform a single sk_wmem_alloc change. Similar change for IPv6 is provided in following patch. Signed-off-by: Eric Dumazet <eduma...@google.com> --- net/ipv4/ip_output.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 66340ab750e69ff5775f7996192839a24ddc6e65..94cacae76aca41e6e7feb7575c7999a414145c49 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -876,6 +876,7 @@ static int __ip_append_data(struct sock *sk, unsigned int maxfraglen, fragheaderlen, maxnonfragsize; int csummode = CHECKSUM_NONE; struct rtable *rt = (struct rtable *)cork->dst; + unsigned int wmem_alloc_delta = 0; u32 tskey = 0; skb = skb_peek_tail(queue); @@ -971,11 +972,10 @@ static int __ip_append_data(struct sock *sk, (flags & MSG_DONTWAIT), &err); } else { skb = NULL; - if (refcount_read(&sk->sk_wmem_alloc) <= + if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <= 2 * sk->sk_sndbuf) - skb = sock_wmalloc(sk, - alloclen + hh_len + 15, 1, - sk->sk_allocation); + skb = alloc_skb(alloclen + hh_len + 15, + sk->sk_allocation); if (unlikely(!skb)) err = -ENOBUFS; } @@ -1033,6 +1033,11 @@ static int __ip_append_data(struct sock *sk, /* * Put the packet on the pending queue. */ + if (!skb->destructor) { + skb->destructor = sock_wfree; + skb->sk = sk; + wmem_alloc_delta += skb->truesize; + } __skb_queue_tail(queue, skb); continue; } @@ -1079,12 +1084,13 @@ static int __ip_append_data(struct sock *sk, skb->len += copy; skb->data_len += copy; skb->truesize += copy; - refcount_add(copy, &sk->sk_wmem_alloc); + wmem_alloc_delta += copy; } offset += copy; length -= copy; } + refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); return 0; error_efault: @@ -1092,6 +1098,7 @@ static int __ip_append_data(struct sock *sk, error: cork->length -= length; IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); + refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); return err; } -- 2.17.0.rc1.321.gba9d0f2565-goog