Here's the autoscaling patch I was mumbling about earlier this week. With this patch applied, the necessity of tuning maxusers when one upgrades to a machine with more ram should be removed in most cases. (This patch is only to -current, the mbuf changes will make it not apply cleanly to -stable patch if there is sufficient demand right now.)
Here's a quick look at the size of various memory allocations with various maxusers sizes and with the autoscaling patch: With maxusers: musers mproc mfiles msocket callout nmbcl nsfbuf tcp hash size 32 532 1064 1064 1612 1024 1024 512 64 1044 2088 2088 3148 1536 1536 512 128 2068 4136 4136 6220 2560 2560 512 256 4116 8232 8232 12364 4608 4608 512 With autoscaling: MB ram mproc mfiles msocket callout nmbcl nsfbuf tcp hash size 32 512 4096 2048 4624 1024 1024 512 64 1024 8192 4096 9232 2048 1024 512 128 2048 16384 8192 18448 4096 2048 1024 256 4096 32768 16384 36880 8192 4096 2048 384 6144 49152 24576 55312 12288 6144 3072 512 8192 65536 32767 73744 16384 8192 4096 (Values above this start to flatten out due to #defined maximums) Note that in general calculations are of the following form: value = max(maxusers-derived value, autoscale-derived value); value = loader tuned value if present As such, under no circumstances will people suddenly see a decrease in various parameters when they upgrade to an autoscaling kernel; only increases. I'm sure that there will be much commotion about what scaling factors are correct. To make changes to these easy, I have grouped all the mins, scaling factors, and maxes in param.h - tweaking them is quite simple. I included mins and maxes to make sure that autoscaling doesn't cause problems by creating low values on small memory machines and also so that it does not specify really high values on 2GB+ machines. The high case is what worries me; I have not heard much about how well maxsockets / nmbclusters > 32767 really works. If people running high volume systems that actively use that many simultaneous sockets + clusters + files, I'd be glad to bump up the maxes. Oh, there's one more kicker thrown in; I changed maxfilesperproc to equal 9/10ths of maxfiles, and maxprocperuid to equal 9/10 maxproc; this'll help to prevent a single process or user from forkbombing the system or running it out of file handles with a default configuration. Please review. Thanks, Mike "Silby" Silbersack
diff -u -r sys.old/alpha/alpha/machdep.c sys/alpha/alpha/machdep.c --- sys.old/alpha/alpha/machdep.c Sat Dec 8 16:05:15 2001 +++ sys/alpha/alpha/machdep.c Sat Dec 8 16:05:28 2001 @@ -556,7 +556,7 @@ kern_envp = bootinfo.envp; /* Do basic tuning, hz etc */ - init_param(); + init_hz(); /* * Initalize the (temporary) bootstrap console interface, so @@ -861,6 +861,9 @@ physmem -= (sz - nsz); } } + + /* Init basic tunables */ + init_param(alpha_ptob(physmem)); /* * Initialize error message buffer (at end of core). diff -u -r sys.old/i386/i386/machdep.c sys/i386/i386/machdep.c --- sys.old/i386/i386/machdep.c Sat Dec 8 16:04:54 2001 +++ sys/i386/i386/machdep.c Sat Dec 8 16:43:20 2001 @@ -1691,8 +1691,8 @@ else if (bootinfo.bi_envp) kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE; - /* Init basic tunables, hz etc */ - init_param(); + /* Init hz */ + init_hz(); /* * make gdt memory segments, the code segment goes up to end of the @@ -1871,6 +1871,9 @@ getmemsize(first); /* now running on new page tables, configured,and u/iom is accessible */ + + /* Init basic tunables */ + init_param(ptoa(Maxmem)); /* Map the message buffer. */ for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE) diff -u -r sys.old/ia64/ia64/machdep.c sys/ia64/ia64/machdep.c --- sys.old/ia64/ia64/machdep.c Sat Dec 8 16:04:52 2001 +++ sys/ia64/ia64/machdep.c Sat Dec 8 16:05:28 2001 @@ -522,8 +522,8 @@ /* get fpswa interface */ fpswa_interface = (FPSWA_INTERFACE*)IA64_PHYS_TO_RR7(bootinfo.bi_fpswa); - /* Init basic tunables, including hz */ - init_param(); + /* Init hz */ + init_hz(); p = getenv("kernelname"); if (p) @@ -623,6 +623,9 @@ phys_avail[phys_avail_cnt] = 0; Maxmem = physmem; + + /* Init basic tunables */ + init_param(ia64_ptob(physmem)); /* * Initialize error message buffer (at end of core). diff -u -r sys.old/kern/subr_mbuf.c sys/kern/subr_mbuf.c --- sys.old/kern/subr_mbuf.c Sat Dec 8 16:04:51 2001 +++ sys/kern/subr_mbuf.c Sat Dec 8 16:09:17 2001 @@ -151,15 +151,21 @@ static void tunable_mbinit(void *dummy) { + int automcls, autosfbuf; + /* Calculate autoscaled values, choose if greater. */ + + automcls = min(MAXAUTOMCLS, max(MINAUTOMCLS, MCLSPERMB * physmemMB)); + nmbclusters = max(automcls, NMBCLUSTERS); + autosfbuf = min(MAXAUTOSFBUF, max(MINAUTOSFBUF, SFBUFPERMB * physmemMB)); + nsfbufs = max(autosfbuf, NSFBUFS); + /* * This has to be done before VM init. */ - nmbclusters = NMBCLUSTERS; TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters); nmbufs = NMBUFS; TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs); - nsfbufs = NSFBUFS; TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs); nmbcnt = NMBCNTS; TUNABLE_INT_FETCH("kern.ipc.nmbcnt", &nmbcnt); diff -u -r sys.old/kern/subr_param.c sys/kern/subr_param.c --- sys.old/kern/subr_param.c Sat Dec 8 16:04:51 2001 +++ sys/kern/subr_param.c Sat Dec 8 16:10:08 2001 @@ -90,39 +90,46 @@ */ struct buf *swbuf; +int physmemMB; + /* * Boot time overrides */ void -init_param(void) +init_param(u_int64_t membytes) { + int memsizemb; + int autoproc, autofiles; + + physmemMB = membytes / 1048576; - /* Base parameters */ + /* Calculate maxusers-derived values. */ maxusers = MAXUSERS; TUNABLE_INT_FETCH("kern.maxusers", &maxusers); - hz = HZ; - TUNABLE_INT_FETCH("kern.hz", &hz); - tick = 1000000 / hz; - tickadj = howmany(30000, 60 * hz); /* can adjust 30ms in 60s */ - - /* The following can be overridden after boot via sysctl */ + nbuf = NBUF; maxproc = NPROC; - TUNABLE_INT_FETCH("kern.maxproc", &maxproc); maxfiles = MAXFILES; - TUNABLE_INT_FETCH("kern.maxfiles", &maxfiles); - maxprocperuid = maxproc - 1; - maxfilesperproc = maxfiles; - - /* Cannot be changed after boot */ - nbuf = NBUF; - TUNABLE_INT_FETCH("kern.nbuf", &nbuf); #ifdef VM_SWZONE_SIZE_MAX maxswzone = VM_SWZONE_SIZE_MAX; #endif - TUNABLE_INT_FETCH("kern.maxswzone", &maxswzone); #ifdef VM_BCACHE_SIZE_MAX maxbcache = VM_BCACHE_SIZE_MAX; #endif + + /* Calculate autoscaled values, choose them if greater than above. */ + autoproc = min(MAXAUTOPROC, max(MINAUTOPROC, PROCPERMB * physmemMB)); + maxproc = max(maxproc, autoproc); + autofiles = min(MAXAUTOFILES, max(MINAUTOFILES, FILESPERMB * physmemMB)); + maxfiles = max(maxfiles, autofiles); + + /* Allow loader-specified tuneables to take effect. */ + TUNABLE_INT_FETCH("kern.maxproc", &maxproc); + TUNABLE_INT_FETCH("kern.maxfiles", &maxfiles); + maxprocperuid = (maxproc * 9) / 10; + maxfilesperproc = (maxfiles * 9) / 10; + + TUNABLE_INT_FETCH("kern.nbuf", &nbuf); + TUNABLE_INT_FETCH("kern.maxswzone", &maxswzone); TUNABLE_INT_FETCH("kern.maxbcache", &maxbcache); ncallout = 16 + maxproc + maxfiles; TUNABLE_INT_FETCH("kern.ncallout", &ncallout); @@ -139,4 +146,16 @@ TUNABLE_QUAD_FETCH("kern.maxssiz", &maxssiz); sgrowsiz = SGROWSIZ; TUNABLE_QUAD_FETCH("kern.sgrowsiz", &sgrowsiz); +} + +/* + * Set hz. This must be called earlier in machdep.c than init_param(). + */ +void +init_hz(void) +{ + hz = HZ; + TUNABLE_INT_FETCH("kern.hz", &hz); + tick = 1000000 / hz; + tickadj = howmany(30000, 60 * hz); /* can adjust 30ms in 60s */ } diff -u -r sys.old/kern/uipc_socket2.c sys/kern/uipc_socket2.c --- sys.old/kern/uipc_socket2.c Sat Dec 8 16:04:50 2001 +++ sys/kern/uipc_socket2.c Sat Dec 8 16:08:43 2001 @@ -1026,7 +1026,12 @@ */ static void init_maxsockets(void *ignored) { + int autosockets, maxuserssockets; + + autosockets = physmemMB * SOCKETSPERMB; + autosockets = min(MAXAUTOSOCKETS, max(MINAUTOSOCKETS, autosockets)); + maxuserssockets = 2 * (20 + (16 * maxusers)); + maxsockets = max(maxuserssockets, max(autosockets, nmbclusters)); TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets); - maxsockets = imax(maxsockets, imax(maxfiles, nmbclusters)); } SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL); diff -u -r sys.old/netinet/tcp_subr.c sys/netinet/tcp_subr.c --- sys.old/netinet/tcp_subr.c Sat Dec 8 16:04:42 2001 +++ sys/netinet/tcp_subr.c Sat Dec 8 16:10:31 2001 @@ -190,6 +190,7 @@ tcp_init() { int hashsize = TCBHASHSIZE; + int autohashsize; tcp_ccgen = 1; tcp_cleartaocache(); @@ -203,6 +204,13 @@ LIST_INIT(&tcb); tcbinfo.listhead = &tcb; + + /* Calculate autoscaled hash size, use if > default hash size. */ + autohashsize = physmemMB * TCBHASHPERMB; + autohashsize = min(MAXAUTOTCBHASH, max(MINAUTOTCBHASH, autohashsize)); + while (!powerof2(autohashsize)) + autohashsize++; + hashsize = max(hashsize, autohashsize); TUNABLE_INT_FETCH("net.inet.tcp.tcbhashsize", &hashsize); if (!powerof2(hashsize)) { printf("WARNING: TCB hash size not a power of 2\n"); diff -u -r sys.old/powerpc/powerpc/machdep.c sys/powerpc/powerpc/machdep.c --- sys.old/powerpc/powerpc/machdep.c Sat Dec 8 16:04:39 2001 +++ sys/powerpc/powerpc/machdep.c Sat Dec 8 16:48:30 2001 @@ -436,7 +436,8 @@ __asm ("mtsprg 0, %0" :: "r"(globalp)); /* Init basic tunables, hz etc */ - init_param(); + init_hz(); + init_param(0); /* XXX - needs to be fed physmem for proper autoscaling */ /* setup curproc so the mutexes work */ diff -u -r sys.old/sparc64/sparc64/machdep.c sys/sparc64/sparc64/machdep.c --- sys.old/sparc64/sparc64/machdep.c Sat Dec 8 16:04:38 2001 +++ sys/sparc64/sparc64/machdep.c Sat Dec 8 16:47:29 2001 @@ -249,10 +249,10 @@ end = (vm_offset_t)_end; } - /* - * Initialize tunables. - */ - init_param(); + /* Init hz */ + init_hz(); + /* Init basic tuneables - XXX - this needs to be moved once maxmem exists +here. */ + init_param(0); #ifdef DDB kdb_init(); diff -u -r sys.old/sys/param.h sys/sys/param.h --- sys.old/sys/param.h Sat Dec 8 16:04:37 2001 +++ sys/sys/param.h Sat Dec 8 16:05:28 2001 @@ -230,6 +230,44 @@ #define ctodb(db) /* calculates pages to devblks */ \ ((db) << (PAGE_SHIFT - DEV_BSHIFT)) +/* + * Values used in autoscaling system structures based on RAM size. + * + * Although settings are scattered across various subsystems, a + * common formula is followed. Generally, there are three + * possible values to choose from: The value suggested by maxusers, + * the value suggested by the autoscaling formula, and a manually + * tuned value from loader.conf. If a manually tuned value is specified, + * this value will be used. Otherwise, the maximum of the maxusers + * and autoscaled setting will be used. + * + */ + +/* Max processes, files. These are set in subr_param.c */ +#define PROCPERMB 16 +#define MINAUTOPROC 256 +#define MAXAUTOPROC 32000 +#define FILESPERMB 128 +#define MINAUTOFILES 1024 +#define MAXAUTOFILES 65536 + +/* Max sockets. These are set in uipc_socket2.c */ +#define SOCKETSPERMB 64 +#define MINAUTOSOCKETS 512 +#define MAXAUTOSOCKETS 32000 + +/* Max mbuf clusters, sendfile buffers. These are set in subr_mbuf.c */ +#define MCLSPERMB 32 +#define MINAUTOMCLS 512 +#define MAXAUTOMCLS 32000 +#define SFBUFPERMB 16 +#define MINAUTOSFBUF 1024 +#define MAXAUTOSFBUF 32000 + +/* Number of TCP hash buckets. These are set in tcp_subr.c */ +#define TCBHASHPERMB 8 +#define MINAUTOTCBHASH 512 +#define MAXAUTOTCBHASH 8192 /* * Make this available for most of the kernel. There were too many diff -u -r sys.old/sys/systm.h sys/sys/systm.h --- sys.old/sys/systm.h Sat Dec 8 16:04:37 2001 +++ sys/sys/systm.h Sat Dec 8 16:07:45 2001 @@ -60,6 +60,7 @@ extern struct cv selwait; /* select conditional variable */ extern int physmem; /* physical memory */ +extern int physmemMB; /* physical memory size in megabytes */ extern dev_t dumpdev; /* dump device */ extern long dumplo; /* offset into dumpdev */ @@ -121,7 +122,8 @@ void cpu_boot __P((int)); void cpu_rootconf __P((void)); -void init_param __P((void)); +void init_hz __P((void)); +void init_param __P((u_int64_t)); void tablefull __P((const char *)); int kvprintf __P((char const *, void (*)(int, void*), void *, int, _BSD_VA_LIST_)) __printflike(1, 0);