The original TSQ algorithm limits the number of packets in qdisc/devices to
two packets / or ~1 ms. With this commit, two sysctl knobs are added to
allow tuning the number of packets or the ms value.

Signed-off-by: Natale Patriciello <natale.patricie...@gmail.com>
Cc: Carlo Augusto Grazia <carloaugusto.gra...@unimore.it>
Tested-by: Carlo Augusto Grazia <carloaugusto.gra...@unimore.it>
---
 Documentation/networking/ip-sysctl.txt | 23 ++++++++++++++++++++++-
 include/net/netns/ipv4.h               |  2 ++
 net/ipv4/sysctl_net_ipv4.c             | 14 ++++++++++++++
 net/ipv4/tcp_output.c                  |  5 ++++-
 4 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt 
b/Documentation/networking/ip-sysctl.txt
index 3b530fe8a494..2510ef885746 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -721,9 +721,30 @@ tcp_limit_output_bytes - INTEGER
        typical pfifo_fast qdiscs.
        tcp_limit_output_bytes limits the number of bytes on qdisc
        or device to reduce artificial RTT/cwnd and reduce bufferbloat.
-       Set to -1 to disable.
+       The overall limit is given by the following (rate is in B/ms):
+       limit = min(output_bytes, max(output_pkt * mss, output_ms * rate)
+       Set to -1 to unconditionally disable TSQ, regardless of the
+       values of tcp_limit_output_ms and tcp_limit_output_pkt.
        Default: 262144
 
+tcp_limit_output_ms - UNSIGNED INTEGER
+       Controls TCP Small Queue limit per TCP socket, under a time point
+       of view. Given a transmission rate, limit the bytes on qdisc or
+       device to a value that can be transmitted approximately in the
+       time provided in this parameter at the given rate. This limit
+       is doubled for retransmissions. The overall limit is given by
+       the following (rate is in B/ms):
+       limit = min(output_bytes, max(output_pkt * mss, output_ms * rate)
+       Default: 1
+
+tcp_limit_output_pkt - UNSIGNED INTEGER
+       Controls TCP Small Queue limit per tcp socket.
+       tcp_limit_output_pkt limits the number of packets queued in
+       qdisc/device. This limit is doubled for retransmissions.
+       The overall limit is given by the following (rate is in B/ms):
+       limit = min(output_bytes, max(output_pkt * mss, output_ms * rate)
+       Default: 2
+
 tcp_challenge_ack_limit - INTEGER
        Limits number of Challenge ACK sent per second, as recommended
        in RFC 5961 (Improving TCP's Robustness to Blind In-Window Attacks)
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 44668c29701a..e2c06827d0bb 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -148,6 +148,8 @@ struct netns_ipv4 {
        int sysctl_tcp_tso_win_divisor;
        int sysctl_tcp_workaround_signed_windows;
        int sysctl_tcp_limit_output_bytes;
+       unsigned int sysctl_tcp_limit_output_ms;
+       unsigned int sysctl_tcp_limit_output_pkt;
        int sysctl_tcp_challenge_ack_limit;
        int sysctl_tcp_min_tso_segs;
        int sysctl_tcp_min_rtt_wlen;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 93e172118a94..775a4d079a9b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1094,6 +1094,20 @@ static struct ctl_table ipv4_net_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec
        },
+       {
+               .procname       = "tcp_limit_output_ms",
+               .data           = &init_net.ipv4.sysctl_tcp_limit_output_ms,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_douintvec
+       },
+       {
+               .procname       = "tcp_limit_output_pkt",
+               .data           = &init_net.ipv4.sysctl_tcp_limit_output_pkt,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_douintvec
+       },
        {
                .procname       = "tcp_challenge_ack_limit",
                .data           = &init_net.ipv4.sysctl_tcp_challenge_ack_limit,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 997a6fbdbe1a..eae715c4a005 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2201,7 +2201,10 @@ static bool tcp_small_queue_check(struct sock *sk, const 
struct sk_buff *skb,
        if (sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes < 0)
                return false;
 
-       limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
+       limit = sock_net(sk)->ipv4.sysctl_tcp_limit_output_ms *
+               (sk->sk_pacing_rate >> 10);
+       limit = max(sock_net(sk)->ipv4.sysctl_tcp_limit_output_pkt * 
skb->truesize,
+                   limit);
        limit = min_t(u32, limit,
                      sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
        limit <<= factor;
-- 
2.15.1

Reply via email to