From: Eric Dumazet <eduma...@google.com> I had many reports that TSQ logic breaks wifi aggregation.
Current logic is to allow up to 1 ms of bytes to be queued into qdisc and drivers queues. But Wifi aggregation needs a bigger budget to allow bigger rates to be discovered by various TCP Congestion Controls algorithms. This patch adds an extra socket field, allowing wifi drivers to select another log scale to derive TCP Small Queue credit from current pacing rate. Initial value is 10, meaning that this patch does not change current behavior. We expect wifi drivers to set this field to smaller values (tests have been done with values from 6 to 9) They would have to use following template : if (skb->sk && skb->sk->sk_pacing_shift != MY_PACING_SHIFT) skb->sk->sk_pacing_shift = MY_PACING_SHIFT; Ref: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1670041 Signed-off-by: Eric Dumazet <eduma...@google.com> Cc: Johannes Berg <johannes.b...@intel.com> Cc: Toke Høiland-Jørgensen <t...@toke.dk> Cc: Kir Kolyshkin <k...@openvz.org> --- include/net/sock.h | 1 + net/core/sock.c | 1 + net/ipv4/tcp_output.c | 4 ++-- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 688a823dccc306bd21f47da167c6922161af5a6a..fb0e5194a3bce61fac00fc234d2a5d1bb3c60f35 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -451,6 +451,7 @@ struct sock { kmemcheck_bitfield_end(flags); u16 sk_gso_max_segs; + u8 sk_pacing_shift; unsigned long sk_lingertime; struct proto *sk_prot_creator; rwlock_t sk_callback_lock; diff --git a/net/core/sock.c b/net/core/sock.c index c59bcf90d90536fedc7809e397f6bd414781b529..2811ff8322d4a5f68e3e745cf585564e1ec5d809 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2746,6 +2746,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_max_pacing_rate = ~0U; sk->sk_pacing_rate = ~0U; + sk->sk_pacing_shift = 10; sk->sk_incoming_cpu = -1; /* * Before updating sk_refcnt, we must commit prior changes to memory diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9b98d35aa0d8d0a829e4a41985d805d4e2895a8e..fa5e7b81b5ec12039b1347474f5183b1d9c87887 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1737,7 +1737,7 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, { u32 bytes, segs; - bytes = min(sk->sk_pacing_rate >> 10, + bytes = min(sk->sk_pacing_rate >> sk->sk_pacing_shift, sk->sk_gso_max_size - 1 - MAX_TCP_HEADER); /* Goal is to send at least one packet per ms, @@ -2215,7 +2215,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, { unsigned int limit; - limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10); + limit = max(2 * skb->truesize, sk->sk_pacing_rate >> sk->sk_pacing_shift); limit = min_t(u32, limit, sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes); limit <<= factor;