I'm pretty sure that the "realmem" calculation is going to overflow on
i386/PAE, where the number of bytes of physical memory is greater than
the type long can represent.

On 11/27/2012 15:19, Andre Oppermann wrote:
> Author: andre
> Date: Tue Nov 27 21:19:58 2012
> New Revision: 243631
> URL: http://svnweb.freebsd.org/changeset/base/243631
>
> Log:
>   Base the mbuf related limits on the available physical memory or
>   kernel memory, whichever is lower.  The overall mbuf related memory
>   limit must be set so that mbufs (and clusters of various sizes)
>   can't exhaust physical RAM or KVM.
>   
>   The limit is set to half of the physical RAM or KVM (whichever is
>   lower) as the baseline.  In any normal scenario we want to leave
>   at least half of the physmem/kvm for other kernel functions and
>   userspace to prevent it from swapping too easily.  Via a tunable
>   kern.maxmbufmem the limit can be upped to at most 3/4 of physmem/kvm.
>   
>   At the same time divorce maxfiles from maxusers and set maxfiles to
>   physpages / 8 with a floor based on maxusers.  This way busy servers
>   can make use of the significantly increased mbuf limits with a much
>   larger number of open sockets.
>   
>   Tidy up ordering in init_param2() and check up on some users of
>   those values calculated here.
>   
>   Out of the overall mbuf memory limit 2K clusters and 4K (page size)
>   clusters to get 1/4 each because these are the most heavily used mbuf
>   sizes.  2K clusters are used for MTU 1500 ethernet inbound packets.
>   4K clusters are used whenever possible for sends on sockets and thus
>   outbound packets.  The larger cluster sizes of 9K and 16K are limited
>   to 1/6 of the overall mbuf memory limit.  When jumbo MTU's are used
>   these large clusters will end up only on the inbound path.  They are
>   not used on outbound, there it's still 4K.  Yes, that will stay that
>   way because otherwise we run into lots of complications in the
>   stack.  And it really isn't a problem, so don't make a scene.
>   
>   Normal mbufs (256B) weren't limited at all previously.  This was
>   problematic as there are certain places in the kernel that on
>   allocation failure of clusters try to piece together their packet
>   from smaller mbufs.
>   
>   The mbuf limit is the number of all other mbuf sizes together plus
>   some more to allow for standalone mbufs (ACK for example) and to
>   send off a copy of a cluster.  Unfortunately there isn't a way to
>   set an overall limit for all mbuf memory together as UMA doesn't
>   support such a limiting.
>   
>   NB: Every cluster also has an mbuf associated with it.
>   
>   Two examples on the revised mbuf sizing limits:
>   
>   1GB KVM:
>    512MB limit for mbufs
>    419,430 mbufs
>     65,536 2K mbuf clusters
>     32,768 4K mbuf clusters
>      9,709 9K mbuf clusters
>      5,461 16K mbuf clusters
>   
>   16GB RAM:
>    8GB limit for mbufs
>    33,554,432 mbufs
>     1,048,576 2K mbuf clusters
>       524,288 4K mbuf clusters
>       155,344 9K mbuf clusters
>        87,381 16K mbuf clusters
>   
>   These defaults should be sufficient for even the most demanding
>   network loads.
>   
>   MFC after:  1 month
>
> Modified:
>   head/sys/kern/kern_mbuf.c
>   head/sys/kern/subr_param.c
>   head/sys/kern/uipc_socket.c
>   head/sys/sys/eventhandler.h
>   head/sys/sys/mbuf.h
>
> Modified: head/sys/kern/kern_mbuf.c
> ==============================================================================
> --- head/sys/kern/kern_mbuf.c Tue Nov 27 20:22:36 2012        (r243630)
> +++ head/sys/kern/kern_mbuf.c Tue Nov 27 21:19:58 2012        (r243631)
> @@ -96,6 +96,7 @@ __FBSDID("$FreeBSD$");
>   *
>   */
>  
> +int nmbufs;                  /* limits number of mbufs */
>  int nmbclusters;             /* limits number of mbuf clusters */
>  int nmbjumbop;                       /* limits number of page size jumbo 
> clusters */
>  int nmbjumbo9;                       /* limits number of 9k jumbo clusters */
> @@ -147,9 +148,11 @@ sysctl_nmbclusters(SYSCTL_HANDLER_ARGS)
>       newnmbclusters = nmbclusters;
>       error = sysctl_handle_int(oidp, &newnmbclusters, 0, req); 
>       if (error == 0 && req->newptr) {
> -             if (newnmbclusters > nmbclusters) {
> +             if (newnmbclusters > nmbclusters &&
> +                 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) 
> {
>                       nmbclusters = newnmbclusters;
>                       uma_zone_set_max(zone_clust, nmbclusters);
> +                     nmbclusters = uma_zone_get_max(zone_clust);
>                       EVENTHANDLER_INVOKE(nmbclusters_change);
>               } else
>                       error = EINVAL;
> @@ -168,9 +171,11 @@ sysctl_nmbjumbop(SYSCTL_HANDLER_ARGS)
>       newnmbjumbop = nmbjumbop;
>       error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req); 
>       if (error == 0 && req->newptr) {
> -             if (newnmbjumbop> nmbjumbop) {
> +             if (newnmbjumbop > nmbjumbop &&
> +                 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) 
> {
>                       nmbjumbop = newnmbjumbop;
>                       uma_zone_set_max(zone_jumbop, nmbjumbop);
> +                     nmbjumbop = uma_zone_get_max(zone_jumbop);
>               } else
>                       error = EINVAL;
>       }
> @@ -189,9 +194,11 @@ sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS)
>       newnmbjumbo9 = nmbjumbo9;
>       error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req); 
>       if (error == 0 && req->newptr) {
> -             if (newnmbjumbo9> nmbjumbo9) {
> +             if (newnmbjumbo9 > nmbjumbo9&&
> +                 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) 
> {
>                       nmbjumbo9 = newnmbjumbo9;
>                       uma_zone_set_max(zone_jumbo9, nmbjumbo9);
> +                     nmbjumbo9 = uma_zone_get_max(zone_jumbo9);
>               } else
>                       error = EINVAL;
>       }
> @@ -209,9 +216,11 @@ sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS)
>       newnmbjumbo16 = nmbjumbo16;
>       error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req); 
>       if (error == 0 && req->newptr) {
> -             if (newnmbjumbo16> nmbjumbo16) {
> +             if (newnmbjumbo16 > nmbjumbo16 &&
> +                 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) 
> {
>                       nmbjumbo16 = newnmbjumbo16;
>                       uma_zone_set_max(zone_jumbo16, nmbjumbo16);
> +                     nmbjumbo16 = uma_zone_get_max(zone_jumbo16);
>               } else
>                       error = EINVAL;
>       }
> @@ -221,6 +230,27 @@ SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumb
>  &nmbjumbo16, 0, sysctl_nmbjumbo16, "IU",
>      "Maximum number of mbuf 16k jumbo clusters allowed");
>  
> +static int
> +sysctl_nmbufs(SYSCTL_HANDLER_ARGS)
> +{
> +     int error, newnmbufs;
> +
> +     newnmbufs = nmbufs;
> +     error = sysctl_handle_int(oidp, &newnmbufs, 0, req); 
> +     if (error == 0 && req->newptr) {
> +             if (newnmbufs > nmbufs) {
> +                     nmbufs = newnmbufs;
> +                     uma_zone_set_max(zone_mbuf, nmbufs);
> +                     nmbclusters = uma_zone_get_max(zone_mbuf);
> +                     EVENTHANDLER_INVOKE(nmbufs_change);
> +             } else
> +                     error = EINVAL;
> +     }
> +     return (error);
> +}
> +SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbuf, CTLTYPE_INT|CTLFLAG_RW,
> +&nmbufs, 0, sysctl_nmbufs, "IU",
> +    "Maximum number of mbufs allowed");
>  
>  
>  SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat,
> @@ -275,6 +305,10 @@ mbuf_init(void *dummy)
>           NULL, NULL,
>  #endif
>           MSIZE - 1, UMA_ZONE_MAXBUCKET);
> +     if (nmbufs > 0) {
> +             uma_zone_set_max(zone_mbuf, nmbufs);
> +             nmbufs = uma_zone_get_max(zone_mbuf);
> +     }
>  
>       zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES,
>           mb_ctor_clust, mb_dtor_clust,
> @@ -284,8 +318,10 @@ mbuf_init(void *dummy)
>           NULL, NULL,
>  #endif
>           UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
> -     if (nmbclusters > 0)
> +     if (nmbclusters > 0) {
>               uma_zone_set_max(zone_clust, nmbclusters);
> +             nmbclusters = uma_zone_get_max(zone_clust);
> +     }
>  
>       zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack,
>           mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf);
> @@ -299,8 +335,10 @@ mbuf_init(void *dummy)
>           NULL, NULL,
>  #endif
>           UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
> -     if (nmbjumbop > 0)
> +     if (nmbjumbop > 0) {
>               uma_zone_set_max(zone_jumbop, nmbjumbop);
> +             nmbjumbop = uma_zone_get_max(zone_jumbop);
> +     }
>  
>       zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES,
>           mb_ctor_clust, mb_dtor_clust,
> @@ -310,9 +348,11 @@ mbuf_init(void *dummy)
>           NULL, NULL,
>  #endif
>           UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
> -     if (nmbjumbo9 > 0)
> -             uma_zone_set_max(zone_jumbo9, nmbjumbo9);
>       uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc);
> +     if (nmbjumbo9 > 0) {
> +             uma_zone_set_max(zone_jumbo9, nmbjumbo9);
> +             nmbjumbo9 = uma_zone_get_max(zone_jumbo9);
> +     }
>  
>       zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES,
>           mb_ctor_clust, mb_dtor_clust,
> @@ -322,9 +362,11 @@ mbuf_init(void *dummy)
>           NULL, NULL,
>  #endif
>           UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
> -     if (nmbjumbo16 > 0)
> -             uma_zone_set_max(zone_jumbo16, nmbjumbo16);
>       uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc);
> +     if (nmbjumbo16 > 0) {
> +             uma_zone_set_max(zone_jumbo16, nmbjumbo16);
> +             nmbjumbo16 = uma_zone_get_max(zone_jumbo16);
> +     }
>  
>       zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int),
>           NULL, NULL,
>
> Modified: head/sys/kern/subr_param.c
> ==============================================================================
> --- head/sys/kern/subr_param.c        Tue Nov 27 20:22:36 2012        
> (r243630)
> +++ head/sys/kern/subr_param.c        Tue Nov 27 21:19:58 2012        
> (r243631)
> @@ -93,6 +93,7 @@ int ncallout;                       /* maximum # of timer ev
>  int  nbuf;
>  int  ngroups_max;                    /* max # groups per process */
>  int  nswbuf;
> +long maxmbufmem;                     /* max mbuf memory */
>  pid_t        pid_max = PID_MAX;
>  long maxswzone;                      /* max swmeta KVA storage */
>  long maxbcache;                      /* max buffer cache KVA storage */
> @@ -270,6 +271,7 @@ init_param1(void)
>  void
>  init_param2(long physpages)
>  {
> +     long realmem;
>  
>       /* Base parameters */
>       maxusers = MAXUSERS;
> @@ -293,19 +295,25 @@ init_param2(long physpages)
>       /*
>        * The following can be overridden after boot via sysctl.  Note:
>        * unless overriden, these macros are ultimately based on maxusers.
> -      */
> -     maxproc = NPROC;
> -     TUNABLE_INT_FETCH("kern.maxproc", &maxproc);
> -     /*
>        * Limit maxproc so that kmap entries cannot be exhausted by
>        * processes.
>        */
> +     maxproc = NPROC;
> +     TUNABLE_INT_FETCH("kern.maxproc", &maxproc);
>       if (maxproc > (physpages / 12))
>               maxproc = physpages / 12;
> -     maxfiles = MAXFILES;
> -     TUNABLE_INT_FETCH("kern.maxfiles", &maxfiles);
>       maxprocperuid = (maxproc * 9) / 10;
> -     maxfilesperproc = (maxfiles * 9) / 10;
> +
> +     /*
> +      * The default limit for maxfiles is 1/12 of the number of
> +      * physical page but not less than 16 times maxusers.
> +      * At most it can be 1/6 the number of physical pages.
> +      */
> +     maxfiles = imax(MAXFILES, physpages / 8);
> +     TUNABLE_INT_FETCH("kern.maxfiles", &maxfiles);
> +     if (maxfiles > (physpages / 4))
> +             maxfiles = physpages / 4;
> +     maxfilesperproc = (maxfiles / 10) * 9;
>       
>       /*
>        * Cannot be changed after boot.
> @@ -313,20 +321,35 @@ init_param2(long physpages)
>       nbuf = NBUF;
>       TUNABLE_INT_FETCH("kern.nbuf", &nbuf);
>  
> +     /*
> +      * XXX: Does the callout wheel have to be so big?
> +      */
>       ncallout = 16 + maxproc + maxfiles;
>       TUNABLE_INT_FETCH("kern.ncallout", &ncallout);
>  
>       /*
> +      * The default limit for all mbuf related memory is 1/2 of all
> +      * available kernel memory (physical or kmem).
> +      * At most it can be 3/4 of available kernel memory.
> +      */
> +     realmem = lmin(physpages * PAGE_SIZE,
> +                     VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS);
> +     maxmbufmem = realmem / 2;
> +     TUNABLE_LONG_FETCH("kern.maxmbufmem", &maxmbufmem);
> +     if (maxmbufmem > (realmem / 4) * 3)
> +             maxmbufmem = (realmem / 4) * 3;
> +
> +     /*
>        * The default for maxpipekva is min(1/64 of the kernel address space,
>        * max(1/64 of main memory, 512KB)).  See sys_pipe.c for more details.
>        */
>       maxpipekva = (physpages / 64) * PAGE_SIZE;
> +     TUNABLE_LONG_FETCH("kern.ipc.maxpipekva", &maxpipekva);
>       if (maxpipekva < 512 * 1024)
>               maxpipekva = 512 * 1024;
>       if (maxpipekva > (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / 64)
>               maxpipekva = (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) /
>                   64;
> -     TUNABLE_LONG_FETCH("kern.ipc.maxpipekva", &maxpipekva);
>  }
>  
>  /*
>
> Modified: head/sys/kern/uipc_socket.c
> ==============================================================================
> --- head/sys/kern/uipc_socket.c       Tue Nov 27 20:22:36 2012        
> (r243630)
> +++ head/sys/kern/uipc_socket.c       Tue Nov 27 21:19:58 2012        
> (r243631)
> @@ -290,7 +290,7 @@ init_maxsockets(void *ignored)
>  {
>  
>       TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets);
> -     maxsockets = imax(maxsockets, imax(maxfiles, nmbclusters));
> +     maxsockets = imax(maxsockets, maxfiles);
>  }
>  SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL);
>  
> @@ -306,12 +306,9 @@ sysctl_maxsockets(SYSCTL_HANDLER_ARGS)
>       newmaxsockets = maxsockets;
>       error = sysctl_handle_int(oidp, &newmaxsockets, 0, req);
>       if (error == 0 && req->newptr) {
> -             if (newmaxsockets > maxsockets) {
> +             if (newmaxsockets > maxsockets &&
> +                 newmaxsockets <= maxfiles) {
>                       maxsockets = newmaxsockets;
> -                     if (maxsockets > ((maxfiles / 4) * 3)) {
> -                             maxfiles = (maxsockets * 5) / 4;
> -                             maxfilesperproc = (maxfiles * 9) / 10;
> -                     }
>                       EVENTHANDLER_INVOKE(maxsockets_change);
>               } else
>                       error = EINVAL;
>
> Modified: head/sys/sys/eventhandler.h
> ==============================================================================
> --- head/sys/sys/eventhandler.h       Tue Nov 27 20:22:36 2012        
> (r243630)
> +++ head/sys/sys/eventhandler.h       Tue Nov 27 21:19:58 2012        
> (r243631)
> @@ -253,6 +253,7 @@ EVENTHANDLER_DECLARE(thread_fini, thread
>  
>  typedef void (*uma_zone_chfn)(void *);
>  EVENTHANDLER_DECLARE(nmbclusters_change, uma_zone_chfn);
> +EVENTHANDLER_DECLARE(nmbufs_change, uma_zone_chfn);
>  EVENTHANDLER_DECLARE(maxsockets_change, uma_zone_chfn);
>  
>  #endif /* SYS_EVENTHANDLER_H */
>
> Modified: head/sys/sys/mbuf.h
> ==============================================================================
> --- head/sys/sys/mbuf.h       Tue Nov 27 20:22:36 2012        (r243630)
> +++ head/sys/sys/mbuf.h       Tue Nov 27 21:19:58 2012        (r243631)
> @@ -395,7 +395,7 @@ struct mbstat {
>   *
>   * The rest of it is defined in kern/kern_mbuf.c
>   */
> -
> +extern long          maxmbufmem;
>  extern uma_zone_t    zone_mbuf;
>  extern uma_zone_t    zone_clust;
>  extern uma_zone_t    zone_pack;
>

_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to