Devices may have limits on the number of fragments in an skb they support.
Current codebase uses a constant as maximum for number of fragments one
skb can hold and use.
When enabling scatter/gather and running traffic with many small messages
the codebase uses the maximum number of fragments and may thereby violate
the max for certain devices.
The patch introduces a global variable as max number of fragments in
scatter/gather.

Signed-off-by: Hans Westgaard Ry <hans.westgaard...@oracle.com>
Reviewed-by: HÃ¥kon Bugge <haakon.bu...@oracle.com>

---
 include/net/tcp.h          |  2 ++
 net/ipv4/sysctl_net_ipv4.c | 10 ++++++++++
 net/ipv4/tcp.c             |  8 +++++---
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index f80e74c..8d18df3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -285,6 +285,8 @@ extern int sysctl_tcp_invalid_ratelimit;
 extern int sysctl_tcp_pacing_ss_ratio;
 extern int sysctl_tcp_pacing_ca_ratio;
 
+extern int sysctl_tcp_sg_max_skb_frags;
+
 extern atomic_long_t tcp_memory_allocated;
 extern struct percpu_counter tcp_sockets_allocated;
 extern int tcp_memory_pressure;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index a0bd7a5..498dcf9 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -40,6 +40,7 @@ static int ip_ttl_min = 1;
 static int ip_ttl_max = 255;
 static int tcp_syn_retries_min = 1;
 static int tcp_syn_retries_max = MAX_TCP_SYNCNT;
+static int tcp_sg_max_skb_frags = MAX_SKB_FRAGS;
 static int ip_ping_group_range_min[] = { 0, 0 };
 static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
 
@@ -761,6 +762,15 @@ static struct ctl_table ipv4_table[] = {
                .proc_handler   = proc_dointvec_ms_jiffies,
        },
        {
+               .procname       = "tcp_sg_max_skb_frags",
+               .data           = &sysctl_tcp_sg_max_skb_frags,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &one,
+               .extra2         = &tcp_sg_max_skb_frags,
+       },
+       {
                .procname       = "icmp_msgs_per_sec",
                .data           = &sysctl_icmp_msgs_per_sec,
                .maxlen         = sizeof(int),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c82cca1..928d2c7 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -300,7 +300,9 @@ EXPORT_SYMBOL(sysctl_tcp_wmem);
 
 atomic_long_t tcp_memory_allocated;    /* Current allocated memory. */
 EXPORT_SYMBOL(tcp_memory_allocated);
-
+/* maximum number of fragments for tcp scatter/gather */
+int sysctl_tcp_sg_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
+EXPORT_SYMBOL(sysctl_tcp_sg_max_skb_frags);
 /*
  * Current number of TCP sockets.
  */
@@ -938,7 +940,7 @@ new_segment:
 
                i = skb_shinfo(skb)->nr_frags;
                can_coalesce = skb_can_coalesce(skb, i, page, offset);
-               if (!can_coalesce && i >= MAX_SKB_FRAGS) {
+               if (!can_coalesce && i >= sysctl_tcp_sg_max_skb_frags) {
                        tcp_mark_push(tp, skb);
                        goto new_segment;
                }
@@ -1211,7 +1213,7 @@ new_segment:
 
                        if (!skb_can_coalesce(skb, i, pfrag->page,
                                              pfrag->offset)) {
-                               if (i == MAX_SKB_FRAGS || !sg) {
+                               if (i == sysctl_tcp_sg_max_skb_frags || !sg) {
                                        tcp_mark_push(tp, skb);
                                        goto new_segment;
                                }
-- 
2.4.3

Reply via email to