On 13.01.2013 11:10, Alan Cox wrote:
On 01/07/2013 12:47, Oleksandr Tymoshenko wrote:
On 12/27/2012 6:46 PM, Oleksandr Tymoshenko wrote:
On 12/18/2012 1:59 AM, Alan Cox wrote:
On 12/17/2012 23:40, Oleksandr Tymoshenko wrote:
On 2012-12-08, at 1:21 PM, Alan Cox <a...@rice.edu> wrote:
That makes sense.  However, "virtual_avail" isn't the start of the
kernel address space.  The kernel map always starts at
VM_MIN_KERNEL_ADDRESS.  (See kmem_init().)  "virtual_avail" represents
the next unallocated virtual address in the kernel address space at an
early point in initialization.  "virtual_avail" and "virtual_end"
aren't
used after that, or outside the VM system.  Please use
vm_map_min(kernel_map) and vm_map_max(kernel_map) instead.

I checked: kernel_map is not available (NULL) at this point.  So we
can't use it to
determine real KVA size. Closest thing we can get is
virtual_avail/virtual_end pair.

Andre, could you approve attached patch for commit or suggest better
solution?

Any update on this one? Can I proceed with commit?


Yes, I've now spent a little bit of time looking at this, and I don't
see why these calculations and tunable_mbinit() need to be performed
before the kernel map is initialized.

Let me summarize what I found:

1. The function tunable_mbinit() now has a dependency on the global
variable maxmbufmem.  tunable_mbinit() is executed under
SI_SUB_TUNABLES.  tunable_mbinit() defines the global variable
nmbclusters.  The statements made in the comment at the head of
tunable_mbinit() all appear to be false:

/*
  * tunable_mbinit() has to be run before init_maxsockets() thus
  * the SYSINIT order below is SI_ORDER_MIDDLE while init_maxsockets()
  * runs at SI_ORDER_ANY.
  *
  * NB: This has to be done before VM init.
  */

I don't see anything in init_maxsockets() that depends on
tunable_mbinit().  Moreover, the statement about "VM init" is only
correct if you regard the initialization of the kernel's malloc as "VM
init".

This seems to be historic cruft.  The dependency on maxsockets was
removed recently with the autotuning improvements.

A patch moving the maxmbufmem calculation into tunable_mbinit() and
changing it to SI_SUB_KMEM which comes after the VM initialization is
attached.

2. The function kmeminit() in kern/kern_malloc.c has a dependency on the
global variable nmbclusters.  kmeminit() is executed under SI_SUB_KMEM,
which comes after the initialization of the virtual memory system,
including the kernel map.

The use of nmbclusters in kmeminit seems to be bogus.  I think it comes
from the times when the mbuf allocator was directly layered on top of
the VM, that is before UMA.

kmeminit() should not use nmbclusters.  The computations done in kmeminit()
do not make a whole lot of sense to me. But I'm no expert in that area.

3. The function vm_ksubmap_init() has a dependency on the global
variable maxpipekva.  vm_ksubmap_init() is executed under SI_SUB_CPU,
which comes after SI_SUB_KMEM.

Am I missing anything?

I'm attaching a patch that defers the calculation of maxpipekva until we
actually need it in vm_ksubmap_init().  Any comments on this patch are
welcome.

Looks good to me.  Perhaps the whole calculation and setup of the pipe_map
could be moved to kern/sys_pipe.c:pipeinit() to have it all together.

--
Andre

Index: sys/mbuf.h
===================================================================
--- sys/mbuf.h  (revision 245423)
+++ sys/mbuf.h  (working copy)
@@ -384,7 +384,6 @@
  *
  * The rest of it is defined in kern/kern_mbuf.c
  */
-extern quad_t          maxmbufmem;
 extern uma_zone_t      zone_mbuf;
 extern uma_zone_t      zone_clust;
 extern uma_zone_t      zone_pack;
Index: kern/kern_mbuf.c
===================================================================
--- kern/kern_mbuf.c    (revision 245423)
+++ kern/kern_mbuf.c    (working copy)
@@ -47,6 +47,7 @@
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
+#include <vm/vm_map.h>
 #include <vm/uma.h>
 #include <vm/uma_int.h>
 #include <vm/uma_dbg.h>
@@ -104,16 +105,25 @@
 struct mbstat mbstat;
 
 /*
- * tunable_mbinit() has to be run before init_maxsockets() thus
- * the SYSINIT order below is SI_ORDER_MIDDLE while init_maxsockets()
- * runs at SI_ORDER_ANY.
- *
- * NB: This has to be done before VM init.
+ * tunable_mbinit() has to be run before any mbuf allocations are done.
  */
 static void
 tunable_mbinit(void *dummy)
 {
+       quad_t realmem, maxmbufmem;
 
+       /*
+        * The default limit for all mbuf related memory is 1/2 of all
+        * available kernel memory (physical or kmem).
+        * At most it can be 3/4 of available kernel memory.
+        */
+       realmem = qmin((quad_t)physmem * PAGE_SIZE,
+           vm_map_max(kernel_map) - vm_map_min(kernel_map));
+       maxmbufmem = realmem / 2;
+       TUNABLE_QUAD_FETCH("kern.maxmbufmem", &maxmbufmem);
+       if (maxmbufmem > realmem / 4 * 3)
+               maxmbufmem = realmem / 4 * 3;
+
        TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters);
        if (nmbclusters == 0)
                nmbclusters = maxmbufmem / MCLBYTES / 4;
@@ -139,7 +149,7 @@
                nmbufs = lmax(maxmbufmem / MSIZE / 5,
                    nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16);
 }
-SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_mbinit, 
NULL);
+SYSINIT(tunable_mbinit, SI_SUB_KMEM, SI_ORDER_MIDDLE, tunable_mbinit, NULL);
 
 static int
 sysctl_nmbclusters(SYSCTL_HANDLER_ARGS)
@@ -279,16 +289,14 @@
 static void    mb_zfini_pack(void *, int);
 
 static void    mb_reclaim(void *);
-static void    mbuf_init(void *);
 static void    *mbuf_jumbo_alloc(uma_zone_t, int, uint8_t *, int);
 
-/* Ensure that MSIZE must be a power of 2. */
+/* Ensure that MSIZE is a power of 2. */
 CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE);
 
 /*
  * Initialize FreeBSD Network buffer allocation.
  */
-SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL);
 static void
 mbuf_init(void *dummy)
 {
@@ -396,6 +404,7 @@
        mbstat.sf_iocnt = 0;
        mbstat.sf_allocwait = mbstat.sf_allocfail = 0;
 }
+SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL);
 
 /*
  * UMA backend page allocator for the jumbo frame zones.
Index: kern/subr_param.c
===================================================================
--- kern/subr_param.c   (revision 245423)
+++ kern/subr_param.c   (working copy)
@@ -93,7 +93,6 @@
 int    nbuf;
 int    ngroups_max;                    /* max # groups per process */
 int    nswbuf;
-quad_t maxmbufmem;                     /* max mbuf memory */
 pid_t  pid_max = PID_MAX;
 long   maxswzone;                      /* max swmeta KVA storage */
 long   maxbcache;                      /* max buffer cache KVA storage */
@@ -272,7 +271,6 @@
 void
 init_param2(long physpages)
 {
-       quad_t realmem;
 
        /* Base parameters */
        maxusers = MAXUSERS;
@@ -329,18 +327,6 @@
        TUNABLE_INT_FETCH("kern.ncallout", &ncallout);
 
        /*
-        * The default limit for all mbuf related memory is 1/2 of all
-        * available kernel memory (physical or kmem).
-        * At most it can be 3/4 of available kernel memory.
-        */
-       realmem = qmin((quad_t)physpages * PAGE_SIZE,
-           VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS);
-       maxmbufmem = realmem / 2;
-       TUNABLE_QUAD_FETCH("kern.maxmbufmem", &maxmbufmem);
-       if (maxmbufmem > (realmem / 4) * 3)
-               maxmbufmem = (realmem / 4) * 3;
-
-       /*
         * The default for maxpipekva is min(1/64 of the kernel address space,
         * max(1/64 of main memory, 512KB)).  See sys_pipe.c for more details.
         */
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to