Here's the autoscaling patch I was mumbling about earlier this week.
With this patch applied, the necessity of tuning maxusers when one
upgrades to a machine with more ram should be removed in most cases.
(This patch is only to -current, the mbuf changes will make it not apply
cleanly to -stable patch if there is sufficient demand right now.)

Here's a quick look at the size of various memory allocations with various
maxusers sizes and with the autoscaling patch:

With maxusers:

musers  mproc   mfiles  msocket callout nmbcl   nsfbuf  tcp hash size
32      532     1064    1064    1612    1024    1024    512
64      1044    2088    2088    3148    1536    1536    512
128     2068    4136    4136    6220    2560    2560    512
256     4116    8232    8232    12364   4608    4608    512

With autoscaling:

MB ram  mproc   mfiles  msocket callout nmbcl   nsfbuf  tcp hash size
32      512     4096    2048    4624    1024    1024    512
64      1024    8192    4096    9232    2048    1024    512
128     2048    16384   8192    18448   4096    2048    1024
256     4096    32768   16384   36880   8192    4096    2048
384     6144    49152   24576   55312   12288   6144    3072
512     8192    65536   32767   73744   16384   8192    4096
(Values above this start to flatten out due to #defined maximums)

Note that in general calculations are of the following form:

value = max(maxusers-derived value, autoscale-derived value);
value = loader tuned value if present

As such, under no circumstances will people suddenly see a decrease in
various parameters when they upgrade to an autoscaling kernel; only
increases.

I'm sure that there will be much commotion about what scaling factors are
correct.  To make changes to these easy, I have grouped all the mins,
scaling factors, and maxes in param.h - tweaking them is quite simple.

I included mins and maxes to make sure that autoscaling doesn't cause
problems by creating low values on small memory machines and also so that
it does not specify really high values on 2GB+ machines.  The high case is
what worries me; I have not heard much about how well maxsockets /
nmbclusters > 32767 really works.  If people running high volume systems
that actively use that many simultaneous sockets + clusters + files, I'd
be glad to bump up the maxes.

Oh, there's one more kicker thrown in; I changed maxfilesperproc to equal
9/10ths of maxfiles, and maxprocperuid to equal 9/10 maxproc; this'll help
to prevent a single process or user from forkbombing the system or running
it out of file handles with a default configuration.

Please review.

Thanks,

Mike "Silby" Silbersack
diff -u -r sys.old/alpha/alpha/machdep.c sys/alpha/alpha/machdep.c
--- sys.old/alpha/alpha/machdep.c       Sat Dec  8 16:05:15 2001
+++ sys/alpha/alpha/machdep.c   Sat Dec  8 16:05:28 2001
@@ -556,7 +556,7 @@
                kern_envp = bootinfo.envp;
 
        /* Do basic tuning, hz etc */
-       init_param();
+       init_hz();
 
        /*
         * Initalize the (temporary) bootstrap console interface, so
@@ -861,6 +861,9 @@
                        physmem -= (sz - nsz);
                }
        }
+
+       /* Init basic tunables */
+       init_param(alpha_ptob(physmem));
 
        /*
         * Initialize error message buffer (at end of core).
diff -u -r sys.old/i386/i386/machdep.c sys/i386/i386/machdep.c
--- sys.old/i386/i386/machdep.c Sat Dec  8 16:04:54 2001
+++ sys/i386/i386/machdep.c     Sat Dec  8 16:43:20 2001
@@ -1691,8 +1691,8 @@
        else if (bootinfo.bi_envp)
                kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE;
 
-       /* Init basic tunables, hz etc */
-       init_param();
+       /* Init hz */
+       init_hz();
 
        /*
         * make gdt memory segments, the code segment goes up to end of the
@@ -1871,6 +1871,9 @@
        getmemsize(first);
 
        /* now running on new page tables, configured,and u/iom is accessible */
+
+       /* Init basic tunables */
+       init_param(ptoa(Maxmem));
 
        /* Map the message buffer. */
        for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE)
diff -u -r sys.old/ia64/ia64/machdep.c sys/ia64/ia64/machdep.c
--- sys.old/ia64/ia64/machdep.c Sat Dec  8 16:04:52 2001
+++ sys/ia64/ia64/machdep.c     Sat Dec  8 16:05:28 2001
@@ -522,8 +522,8 @@
        /* get fpswa interface */
        fpswa_interface = (FPSWA_INTERFACE*)IA64_PHYS_TO_RR7(bootinfo.bi_fpswa);
 
-       /* Init basic tunables, including hz */
-       init_param();
+       /* Init hz */
+       init_hz();
 
        p = getenv("kernelname");
        if (p)
@@ -623,6 +623,9 @@
        phys_avail[phys_avail_cnt] = 0;
 
        Maxmem = physmem;
+
+       /* Init basic tunables */
+       init_param(ia64_ptob(physmem));
 
        /*
         * Initialize error message buffer (at end of core).
diff -u -r sys.old/kern/subr_mbuf.c sys/kern/subr_mbuf.c
--- sys.old/kern/subr_mbuf.c    Sat Dec  8 16:04:51 2001
+++ sys/kern/subr_mbuf.c        Sat Dec  8 16:09:17 2001
@@ -151,15 +151,21 @@
 static void
 tunable_mbinit(void *dummy)
 {
+       int automcls, autosfbuf;
 
+       /* Calculate autoscaled values, choose if greater. */
+
+       automcls = min(MAXAUTOMCLS, max(MINAUTOMCLS, MCLSPERMB * physmemMB));
+       nmbclusters = max(automcls, NMBCLUSTERS);
+       autosfbuf = min(MAXAUTOSFBUF, max(MINAUTOSFBUF, SFBUFPERMB * physmemMB));
+       nsfbufs = max(autosfbuf, NSFBUFS);
+       
        /*
         * This has to be done before VM init.
         */
-       nmbclusters = NMBCLUSTERS;
        TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters);
        nmbufs = NMBUFS;
        TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs);
-       nsfbufs = NSFBUFS;
        TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
        nmbcnt = NMBCNTS;
        TUNABLE_INT_FETCH("kern.ipc.nmbcnt", &nmbcnt);
diff -u -r sys.old/kern/subr_param.c sys/kern/subr_param.c
--- sys.old/kern/subr_param.c   Sat Dec  8 16:04:51 2001
+++ sys/kern/subr_param.c       Sat Dec  8 16:10:08 2001
@@ -90,39 +90,46 @@
  */
 struct buf *swbuf;
 
+int physmemMB;
+
 /*
  * Boot time overrides
  */
 void
-init_param(void)
+init_param(u_int64_t membytes)
 {
+       int memsizemb;
+       int autoproc, autofiles;
+
+       physmemMB = membytes / 1048576;
 
-       /* Base parameters */
+       /* Calculate maxusers-derived values. */
        maxusers = MAXUSERS;
        TUNABLE_INT_FETCH("kern.maxusers", &maxusers);
-       hz = HZ;
-       TUNABLE_INT_FETCH("kern.hz", &hz);
-       tick = 1000000 / hz;
-       tickadj = howmany(30000, 60 * hz);      /* can adjust 30ms in 60s */
-
-       /* The following can be overridden after boot via sysctl */
+       nbuf = NBUF;
        maxproc = NPROC;
-       TUNABLE_INT_FETCH("kern.maxproc", &maxproc);
        maxfiles = MAXFILES;
-       TUNABLE_INT_FETCH("kern.maxfiles", &maxfiles);
-       maxprocperuid = maxproc - 1;
-       maxfilesperproc = maxfiles;
-
-       /* Cannot be changed after boot */
-       nbuf = NBUF;
-       TUNABLE_INT_FETCH("kern.nbuf", &nbuf);
 #ifdef VM_SWZONE_SIZE_MAX
        maxswzone = VM_SWZONE_SIZE_MAX;
 #endif
-       TUNABLE_INT_FETCH("kern.maxswzone", &maxswzone);
 #ifdef VM_BCACHE_SIZE_MAX
        maxbcache = VM_BCACHE_SIZE_MAX;
 #endif
+
+       /* Calculate autoscaled values, choose them if greater than above. */
+       autoproc = min(MAXAUTOPROC, max(MINAUTOPROC, PROCPERMB * physmemMB));
+       maxproc = max(maxproc, autoproc);
+       autofiles = min(MAXAUTOFILES, max(MINAUTOFILES, FILESPERMB * physmemMB));
+       maxfiles = max(maxfiles, autofiles);
+
+       /* Allow loader-specified tuneables to take effect. */
+       TUNABLE_INT_FETCH("kern.maxproc", &maxproc);
+       TUNABLE_INT_FETCH("kern.maxfiles", &maxfiles);
+       maxprocperuid = (maxproc * 9) / 10;
+       maxfilesperproc = (maxfiles * 9) / 10;
+
+       TUNABLE_INT_FETCH("kern.nbuf", &nbuf);
+       TUNABLE_INT_FETCH("kern.maxswzone", &maxswzone);
        TUNABLE_INT_FETCH("kern.maxbcache", &maxbcache);
        ncallout = 16 + maxproc + maxfiles;
        TUNABLE_INT_FETCH("kern.ncallout", &ncallout);
@@ -139,4 +146,16 @@
        TUNABLE_QUAD_FETCH("kern.maxssiz", &maxssiz);
        sgrowsiz = SGROWSIZ;
        TUNABLE_QUAD_FETCH("kern.sgrowsiz", &sgrowsiz);
+}
+
+/*
+ * Set hz.  This must be called earlier in machdep.c than init_param().
+ */
+void
+init_hz(void)
+{
+       hz = HZ;
+       TUNABLE_INT_FETCH("kern.hz", &hz);
+       tick = 1000000 / hz;
+       tickadj = howmany(30000, 60 * hz);      /* can adjust 30ms in 60s */
 }
diff -u -r sys.old/kern/uipc_socket2.c sys/kern/uipc_socket2.c
--- sys.old/kern/uipc_socket2.c Sat Dec  8 16:04:50 2001
+++ sys/kern/uipc_socket2.c     Sat Dec  8 16:08:43 2001
@@ -1026,7 +1026,12 @@
  */
 static void init_maxsockets(void *ignored)
 {
+       int autosockets, maxuserssockets;
+       
+       autosockets = physmemMB * SOCKETSPERMB;
+       autosockets = min(MAXAUTOSOCKETS, max(MINAUTOSOCKETS, autosockets));
+       maxuserssockets = 2 * (20 + (16 * maxusers)); 
+       maxsockets = max(maxuserssockets, max(autosockets, nmbclusters));
        TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets);
-       maxsockets = imax(maxsockets, imax(maxfiles, nmbclusters));
 }
 SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL);
diff -u -r sys.old/netinet/tcp_subr.c sys/netinet/tcp_subr.c
--- sys.old/netinet/tcp_subr.c  Sat Dec  8 16:04:42 2001
+++ sys/netinet/tcp_subr.c      Sat Dec  8 16:10:31 2001
@@ -190,6 +190,7 @@
 tcp_init()
 {
        int hashsize = TCBHASHSIZE;
+       int autohashsize;
        
        tcp_ccgen = 1;
        tcp_cleartaocache();
@@ -203,6 +204,13 @@
 
        LIST_INIT(&tcb);
        tcbinfo.listhead = &tcb;
+
+       /* Calculate autoscaled hash size, use if > default hash size. */
+       autohashsize = physmemMB * TCBHASHPERMB;
+       autohashsize = min(MAXAUTOTCBHASH, max(MINAUTOTCBHASH, autohashsize));
+       while (!powerof2(autohashsize))
+               autohashsize++;
+       hashsize = max(hashsize, autohashsize);
        TUNABLE_INT_FETCH("net.inet.tcp.tcbhashsize", &hashsize);
        if (!powerof2(hashsize)) {
                printf("WARNING: TCB hash size not a power of 2\n");
diff -u -r sys.old/powerpc/powerpc/machdep.c sys/powerpc/powerpc/machdep.c
--- sys.old/powerpc/powerpc/machdep.c   Sat Dec  8 16:04:39 2001
+++ sys/powerpc/powerpc/machdep.c       Sat Dec  8 16:48:30 2001
@@ -436,7 +436,8 @@
        __asm ("mtsprg 0, %0" :: "r"(globalp));
 
        /* Init basic tunables, hz etc */
-       init_param();
+       init_hz();
+       init_param(0); /* XXX - needs to be fed physmem for proper autoscaling */
 
        /* setup curproc so the mutexes work */
 
diff -u -r sys.old/sparc64/sparc64/machdep.c sys/sparc64/sparc64/machdep.c
--- sys.old/sparc64/sparc64/machdep.c   Sat Dec  8 16:04:38 2001
+++ sys/sparc64/sparc64/machdep.c       Sat Dec  8 16:47:29 2001
@@ -249,10 +249,10 @@
                end = (vm_offset_t)_end;
        }
 
-       /*
-        * Initialize tunables.
-        */
-       init_param();
+       /* Init hz */
+       init_hz();
+       /* Init basic tuneables - XXX - this needs to be moved once maxmem exists 
+here. */
+       init_param(0);
 
 #ifdef DDB
        kdb_init();
diff -u -r sys.old/sys/param.h sys/sys/param.h
--- sys.old/sys/param.h Sat Dec  8 16:04:37 2001
+++ sys/sys/param.h     Sat Dec  8 16:05:28 2001
@@ -230,6 +230,44 @@
 #define ctodb(db)                      /* calculates pages to devblks */ \
        ((db) << (PAGE_SHIFT - DEV_BSHIFT))
 
+/*
+ * Values used in autoscaling system structures based on RAM size.
+ *
+ * Although settings are scattered across various subsystems, a
+ * common formula is followed.  Generally, there are three
+ * possible values to choose from:  The value suggested by maxusers,
+ * the value suggested by the autoscaling formula, and a manually
+ * tuned value from loader.conf.  If a manually tuned value is specified,
+ * this value will be used.  Otherwise, the maximum of the maxusers
+ * and autoscaled setting will be used.
+ *
+ */
+
+/* Max processes, files.  These are set in subr_param.c */
+#define PROCPERMB 16
+#define MINAUTOPROC 256
+#define MAXAUTOPROC 32000
+#define FILESPERMB 128
+#define MINAUTOFILES 1024
+#define MAXAUTOFILES 65536
+
+/* Max sockets.  These are set in uipc_socket2.c */
+#define SOCKETSPERMB 64
+#define MINAUTOSOCKETS 512
+#define MAXAUTOSOCKETS 32000
+
+/* Max mbuf clusters, sendfile buffers.  These are set in subr_mbuf.c */
+#define MCLSPERMB 32
+#define MINAUTOMCLS 512
+#define MAXAUTOMCLS 32000
+#define SFBUFPERMB 16
+#define MINAUTOSFBUF 1024
+#define MAXAUTOSFBUF 32000
+
+/* Number of TCP hash buckets.  These are set in tcp_subr.c */
+#define TCBHASHPERMB 8
+#define MINAUTOTCBHASH 512
+#define MAXAUTOTCBHASH 8192
 
 /*
  * Make this available for most of the kernel.  There were too many
diff -u -r sys.old/sys/systm.h sys/sys/systm.h
--- sys.old/sys/systm.h Sat Dec  8 16:04:37 2001
+++ sys/sys/systm.h     Sat Dec  8 16:07:45 2001
@@ -60,6 +60,7 @@
 extern struct cv selwait;      /* select conditional variable */
 
 extern int physmem;            /* physical memory */
+extern int physmemMB;          /* physical memory size in megabytes */
 
 extern dev_t dumpdev;          /* dump device */
 extern long dumplo;            /* offset into dumpdev */
@@ -121,7 +122,8 @@
 
 void   cpu_boot __P((int));
 void   cpu_rootconf __P((void));
-void   init_param __P((void));
+void   init_hz __P((void));
+void   init_param __P((u_int64_t));
 void   tablefull __P((const char *));
 int    kvprintf __P((char const *, void (*)(int, void*), void *, int,
                      _BSD_VA_LIST_)) __printflike(1, 0);

Reply via email to