Hi,
To move our network performance to modern high bandwith and high
latency characteristics, we have to increase the socket buffer size
limit. That also implies more mbuf clusters to avoid running out
of them.
This diff includes several aspects that somehow belong together.
- Increase the number of mbufs on most architectures. I have to
guess how much memory a typical machine has. If the value is too
high, we may run out of kernel memory.
arch/alpha/include/param.h:#define NMBCLUSTERS (16 * 1026)
arch/amd64/include/param.h:#define NMBCLUSTERS (256 * 1024)
arch/arm/include/param.h:#define NMBCLUSTERS (16 * 1024)
arch/hppa/include/param.h:#define NMBCLUSTERS (4 * 1024)
arch/i386/include/param.h:#define NMBCLUSTERS (32 * 1024)
arch/m88k/include/param.h:#define NMBCLUSTERS (4 * 1024)
arch/mips64/include/param.h:#define NMBCLUSTERS (8 * 1024)
arch/powerpc/include/param.h:#define NMBCLUSTERS (4 * 1024)
arch/sh/include/param.h:#define NMBCLUSTERS (4 * 1024)
arch/sparc64/include/param.h:#define NMBCLUSTERS (16 * 1024)
- Set size of mclsizes array explicitly to keep it in sync with
mclpools.
- Limit all mbuf cluster pools to the same memory size. Having
limits by number will allow the large clusters using too much
memory.
- If sosend() cannot allocate a large cluster, try a small one as
fallback.
- Reduce the factor of the limits derived form NMBCLUSTERS. We
want the additional clusters in the socket buffer and not elsewhere.
- Increase the socket buffer size limit from 256 KB to 2 MB. If the
value is too high, we may run out of mbufs.
Feel free to comment and ok the parts seperately.
bluhm
Index: arch/alpha/include/param.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/arch/alpha/include/param.h,v
retrieving revision 1.39
diff -u -p -r1.39 param.h
--- arch/alpha/include/param.h 26 Mar 2013 05:04:08 -0000 1.39
+++ arch/alpha/include/param.h 2 Sep 2016 15:35:35 -0000
@@ -63,7 +63,7 @@
#define USPACE (UPAGES * PAGE_SIZE) /* total size of u-area
*/
#define USPACE_ALIGN 0 /* u-area alignment
0-none */
-#define NMBCLUSTERS 4096 /* map size, max
cluster allocation */
+#define NMBCLUSTERS (16 * 1026) /* max cluster
allocation */
#ifndef MSGBUFSIZE
#define MSGBUFSIZE (2 * PAGE_SIZE) /* default message
buffer size */
Index: arch/amd64/include/param.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/arch/amd64/include/param.h,v
retrieving revision 1.22
diff -u -p -r1.22 param.h
--- arch/amd64/include/param.h 26 Mar 2013 05:04:10 -0000 1.22
+++ arch/amd64/include/param.h 2 Sep 2016 15:58:26 -0000
@@ -72,7 +72,7 @@
#define USPACE (UPAGES * PAGE_SIZE) /* total size of u-area
*/
#define USPACE_ALIGN 0 /* u-area alignment
0-none */
-#define NMBCLUSTERS 6144 /* map size, max
cluster allocation */
+#define NMBCLUSTERS (256 * 1024) /* max cluster
allocation */
#ifndef MSGBUFSIZE
#define MSGBUFSIZE (16 * PAGE_SIZE) /* default message
buffer size */
Index: arch/arm/include/param.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/arch/arm/include/param.h,v
retrieving revision 1.22
diff -u -p -r1.22 param.h
--- arch/arm/include/param.h 20 Aug 2016 19:31:01 -0000 1.22
+++ arch/arm/include/param.h 2 Sep 2016 16:00:12 -0000
@@ -55,7 +55,7 @@
#define USPACE (UPAGES * PAGE_SIZE) /* total size of u-area
*/
#define USPACE_ALIGN 0 /* u-area alignment
0-none */
-#define NMBCLUSTERS 4096 /* map size, max
cluster allocation */
+#define NMBCLUSTERS (16 * 1024) /* max cluster
allocation */
/*
* Maximum size of the kernel malloc arena in PAGE_SIZE-sized
Index: arch/hppa/include/param.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/arch/hppa/include/param.h,v
retrieving revision 1.45
diff -u -p -r1.45 param.h
--- arch/hppa/include/param.h 26 Mar 2013 05:04:10 -0000 1.45
+++ arch/hppa/include/param.h 2 Sep 2016 15:39:12 -0000
@@ -51,7 +51,7 @@
#define USPACE (UPAGES * PAGE_SIZE) /* total size of u-area
*/
#define USPACE_ALIGN 0 /* u-area alignment
0-none */
-#define NMBCLUSTERS 4096 /* map size, max
cluster allocation */
+#define NMBCLUSTERS (4 * 1024) /* max cluster
allocation */
#ifndef MSGBUFSIZE
#define MSGBUFSIZE (2 * PAGE_SIZE) /* default message
buffer size */
Index: arch/i386/include/param.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/arch/i386/include/param.h,v
retrieving revision 1.47
diff -u -p -r1.47 param.h
--- arch/i386/include/param.h 26 Mar 2013 05:04:10 -0000 1.47
+++ arch/i386/include/param.h 2 Sep 2016 15:39:43 -0000
@@ -67,7 +67,7 @@
#define USPACE (UPAGES * PAGE_SIZE) /* total size of u-area
*/
#define USPACE_ALIGN 0 /* u-area alignment
0-none */
-#define NMBCLUSTERS 6144 /* map size, max
cluster allocation */
+#define NMBCLUSTERS (32 * 1024) /* max cluster
allocation */
#ifndef MSGBUFSIZE
#define MSGBUFSIZE (4 * PAGE_SIZE) /* default message
buffer size */
Index: arch/m88k/include/param.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/arch/m88k/include/param.h,v
retrieving revision 1.19
diff -u -p -r1.19 param.h
--- arch/m88k/include/param.h 1 Apr 2013 12:52:24 -0000 1.19
+++ arch/m88k/include/param.h 2 Sep 2016 15:45:03 -0000
@@ -64,7 +64,7 @@
#define USPACE (UPAGES * PAGE_SIZE) /* total size of u-area
*/
#define USPACE_ALIGN 0 /* u-area alignment
0-none */
-#define NMBCLUSTERS 4096 /* map size, max cluster
allocation */
+#define NMBCLUSTERS (4 * 1024) /* max cluster
allocation */
#ifndef MSGBUFSIZE
#define MSGBUFSIZE PAGE_SIZE
Index: arch/mips64/include/param.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/arch/mips64/include/param.h,v
retrieving revision 1.34
diff -u -p -r1.34 param.h
--- arch/mips64/include/param.h 2 Nov 2015 20:13:45 -0000 1.34
+++ arch/mips64/include/param.h 2 Sep 2016 16:00:29 -0000
@@ -62,7 +62,7 @@
#define USPACE_ALIGN (2 * PAGE_SIZE) /* align to an even TLB
boundary */
#endif
-#define NMBCLUSTERS 4096 /* map size, max cluster
allocation */
+#define NMBCLUSTERS (8 * 1024) /* max cluster
allocation */
#ifndef MSGBUFSIZE
#if PAGE_SHIFT > 12
Index: arch/powerpc/include/param.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/arch/powerpc/include/param.h,v
retrieving revision 1.36
diff -u -p -r1.36 param.h
--- arch/powerpc/include/param.h 13 Jun 2013 11:29:20 -0000 1.36
+++ arch/powerpc/include/param.h 2 Sep 2016 15:46:12 -0000
@@ -58,7 +58,7 @@
#define USPACE (UPAGES * PAGE_SIZE) /* total size of u-area
*/
#define USPACE_ALIGN 0 /* u-area alignment
0-none */
-#define NMBCLUSTERS 4096 /* map size, max
cluster allocation */
+#define NMBCLUSTERS (4 * 1024) /* max cluster
allocation */
/*
* Maximum size of the kernel malloc arena in PAGE_SIZE-sized
Index: arch/sh/include/param.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/arch/sh/include/param.h,v
retrieving revision 1.11
diff -u -p -r1.11 param.h
--- arch/sh/include/param.h 26 Mar 2013 05:04:10 -0000 1.11
+++ arch/sh/include/param.h 2 Sep 2016 15:59:29 -0000
@@ -80,7 +80,7 @@
#ifdef _KERNEL
-#define NMBCLUSTERS 4096 /* map size, max
cluster allocation */
+#define NMBCLUSTERS (4 * 1024) /* max cluster
allocation */
#ifndef MSGBUFSIZE
#define MSGBUFSIZE PAGE_SIZE /* default message
buffer size */
Index: arch/sparc64/include/param.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/arch/sparc64/include/param.h,v
retrieving revision 1.39
diff -u -p -r1.39 param.h
--- arch/sparc64/include/param.h 26 Mar 2013 05:04:10 -0000 1.39
+++ arch/sparc64/include/param.h 2 Sep 2016 15:48:11 -0000
@@ -135,7 +135,7 @@
#define USPACE (UPAGES * PAGE_SIZE) /* total size of u-area
*/
#define USPACE_ALIGN 0 /* u-area alignment
0-none */
-#define NMBCLUSTERS 4096 /* map size, max
cluster allocation */
+#define NMBCLUSTERS (16 * 1024) /* max cluster
allocation */
#ifndef MSGBUFSIZE
#define MSGBUFSIZE (1 * PAGE_SIZE)
Index: kern/uipc_mbuf.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_mbuf.c,v
retrieving revision 1.226
diff -u -p -r1.226 uipc_mbuf.c
--- kern/uipc_mbuf.c 13 Jun 2016 21:24:43 -0000 1.226
+++ kern/uipc_mbuf.c 2 Sep 2016 15:10:21 -0000
@@ -105,7 +105,7 @@ struct pool mbpool; /* mbuf pool */
struct pool mtagpool;
/* mbuf cluster pools */
-u_int mclsizes[] = {
+u_int mclsizes[MCLPOOLS] = {
MCLBYTES, /* must be at slot 0 */
4 * 1024,
8 * 1024,
@@ -179,15 +179,16 @@ mbinit(void)
void
nmbclust_update(void)
{
- int i;
+ unsigned int i, n;
+
/*
* Set the hard limit on the mclpools to the number of
* mbuf clusters the kernel is to support. Log the limit
* reached message max once a minute.
*/
for (i = 0; i < nitems(mclsizes); i++) {
- (void)pool_sethardlimit(&mclpools[i], nmbclust,
- mclpool_warnmsg, 60);
+ n = (unsigned long long)nmbclust * MCLBYTES / mclsizes[i];
+ (void)pool_sethardlimit(&mclpools[i], n, mclpool_warnmsg, 60);
/*
* XXX this needs to be reconsidered.
* Setting the high water mark to nmbclust is too high
@@ -195,7 +196,7 @@ nmbclust_update(void)
* allocations in interrupt context don't fail or mclgeti()
* drivers may end up with empty rings.
*/
- pool_sethiwat(&mclpools[i], nmbclust);
+ pool_sethiwat(&mclpools[i], n);
}
pool_sethiwat(&mbpool, nmbclust);
}
Index: kern/uipc_socket.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.155
diff -u -p -r1.155 uipc_socket.c
--- kern/uipc_socket.c 25 Aug 2016 14:13:19 -0000 1.155
+++ kern/uipc_socket.c 2 Sep 2016 15:12:38 -0000
@@ -546,6 +546,8 @@ m_getuio(struct mbuf **mp, int atomic, l
if (resid >= MINCLSIZE) {
MCLGETI(m, M_NOWAIT, NULL, ulmin(resid, MAXMCLBYTES));
if ((m->m_flags & M_EXT) == 0)
+ MCLGETI(m, M_NOWAIT, NULL, MCLBYTES);
+ if ((m->m_flags & M_EXT) == 0)
goto nopages;
mlen = m->m_ext.ext_size;
len = ulmin(mlen, resid);
Index: net/pfvar.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pfvar.h,v
retrieving revision 1.436
diff -u -p -r1.436 pfvar.h
--- net/pfvar.h 20 Aug 2016 08:34:30 -0000 1.436
+++ net/pfvar.h 2 Sep 2016 16:03:30 -0000
@@ -1411,8 +1411,8 @@ struct pf_divert {
};
/* Fragment entries reference mbuf clusters, so base the default on that. */
-#define PFFRAG_FRENT_HIWAT (NMBCLUSTERS / 4) /* Number of entries */
-#define PFFRAG_FRAG_HIWAT (NMBCLUSTERS / 8) /* Number of packets */
+#define PFFRAG_FRENT_HIWAT (NMBCLUSTERS / 16) /* Number of entries */
+#define PFFRAG_FRAG_HIWAT (NMBCLUSTERS / 32) /* Number of packets */
#define PFR_KTABLE_HIWAT 1000 /* Number of tables */
#define PFR_KENTRY_HIWAT 200000 /* Number of table entries */
Index: netinet/tcp_subr.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_subr.c,v
retrieving revision 1.152
diff -u -p -r1.152 tcp_subr.c
--- netinet/tcp_subr.c 31 Aug 2016 11:05:05 -0000 1.152
+++ netinet/tcp_subr.c 2 Sep 2016 16:03:24 -0000
@@ -120,7 +120,7 @@ u_int32_t tcp_now = 1;
#define TCB_INITIAL_HASH_SIZE 128
#endif
-int tcp_reass_limit = NMBCLUSTERS / 2; /* hardlimit for tcpqe_pool */
+int tcp_reass_limit = NMBCLUSTERS / 8; /* hardlimit for tcpqe_pool */
#ifdef TCP_SACK
int tcp_sackhole_limit = 32*1024; /* hardlimit for sackhl_pool */
#endif
Index: sys/socketvar.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/socketvar.h,v
retrieving revision 1.62
diff -u -p -r1.62 socketvar.h
--- sys/socketvar.h 25 Aug 2016 14:13:19 -0000 1.62
+++ sys/socketvar.h 2 Sep 2016 16:34:59 -0000
@@ -116,7 +116,7 @@ struct socket {
short sb_flags; /* flags, see below */
u_short sb_timeo; /* timeout for read/write */
} so_rcv, so_snd;
-#define SB_MAX (256*1024) /* default for max chars in
sockbuf */
+#define SB_MAX (2*1024*1024) /* default for max chars in
sockbuf */
#define SB_WAIT 0x04 /* someone is waiting for
data/space */
#define SB_SEL 0x08 /* someone is selecting */
#define SB_ASYNC 0x10 /* ASYNC I/O, need signals */