Re: mbuf statistics, tracking of drops

David Gwynne Thu, 16 Nov 2017 17:03:56 -0800

On Thu, Nov 16, 2017 at 08:13:39PM +0100, Gregor Best wrote:
> On Thu, Nov 16, 2017 at 11:13:04AM +1000, David Gwynne wrote:
> > 
> > > On 16 Nov 2017, at 7:23 am, Gregor Best <[email protected]> wrote:
> > > 
> > > Hi,
> > > 
> > > On Mon, Nov 13, 2017 at 01:47:01PM +1000, David Gwynne wrote:
> > >> [...]
> > >> pools maintain count of how many times they failed to provide an
> > >> allocation. you can watch this with vmstat -m or systat pool.
> > >> however, we could use that to populate mb_drops too.
> > >> [...]
> > > 
> > > That's certainly smarter than my idea.
> > 
> > does it work though?
> > 
> 
> Now that I think about it, not quite, or at least not without reaching
> around into the pools innards to lock them while reading the statistics
> to get a consistent view.


currently pr_nfails is an unsigned long, which can be atomically
read on all our machines. it is a consistent view of the number.
however, i would like to make the stats uint64_t one day, so i agree
that it is better to lock to read them.

the diff below factors our the reading of the pool stats into a
struct kinfo_pool, which is used internally by pool and now by the
sysctl_mbstat.

> 
> The problem is that the MBSTAT_DROPS part of the `counters` data
> structure never gets modified, so we'd still need to loop over all pools
> in sysctl_mbstat, since this is not just about mbpool but also about the
> pools for payload data.

ok. the updated diff below loops over the cluster pools too now.

> 
> On the other hand, going back to my initial proposal would mean
> essentially duplicating functionality the pools already provide, so
> that's at least a bit unelegant. Especially since it adds at least one
> splnet/splx dance.
> 
> Another option would be to just say "screw it" and count the failed
> allocations without acquiring any locks on the pools, maybe via atomic
> operations. Sounds a bit too complicated though.
> 
> At the moment, and after a night's sleep, I think my initial proposal is
> the most straightforward way to do this. That, or getting rid of the
> confusing counters in `netstat -m` that stay at 0 all the time...

getting rid of the stats does appeal to me too...

Index: sys/pool.h
===================================================================
RCS file: /cvs/src/sys/sys/pool.h,v
retrieving revision 1.74
diff -u -p -r1.74 pool.h
--- sys/pool.h  13 Aug 2017 20:26:33 -0000      1.74
+++ sys/pool.h  17 Nov 2017 00:58:13 -0000
@@ -259,6 +259,7 @@ void                pool_init(struct pool *, size_t, u
                    const char *, struct pool_allocator *);
 void           pool_cache_init(struct pool *);
 void           pool_destroy(struct pool *);
+void           pool_info(struct pool *, struct kinfo_pool *);
 void           pool_setlowat(struct pool *, int);
 void           pool_sethiwat(struct pool *, int);
 int            pool_sethardlimit(struct pool *, u_int, const char *, int);
Index: sys/sysctl.h
===================================================================
RCS file: /cvs/src/sys/sys/sysctl.h,v
retrieving revision 1.175
diff -u -p -r1.175 sysctl.h
--- sys/sysctl.h        12 Oct 2017 09:14:16 -0000      1.175
+++ sys/sysctl.h        17 Nov 2017 00:58:13 -0000
@@ -936,6 +936,7 @@ int sysctl_rdstruct(void *, size_t *, vo
 int sysctl_struct(void *, size_t *, void *, size_t, void *, size_t);
 int sysctl_file(int *, u_int, char *, size_t *, struct proc *);
 int sysctl_doproc(int *, u_int, char *, size_t *);
+int sysctl_mbstat(int *, u_int, void *, size_t *, void *, size_t);
 struct mbuf_queue;
 int sysctl_mq(int *, u_int, void *, size_t *, void *, size_t,
     struct mbuf_queue *);
Index: kern/kern_sysctl.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.330
diff -u -p -r1.330 kern_sysctl.c
--- kern/kern_sysctl.c  11 Aug 2017 21:24:19 -0000      1.330
+++ kern/kern_sysctl.c  17 Nov 2017 00:58:13 -0000
@@ -392,24 +392,9 @@ kern_sysctl(int *name, u_int namelen, vo
        case KERN_FILE:
                return (sysctl_file(name + 1, namelen - 1, oldp, oldlenp, p));
 #endif
-       case KERN_MBSTAT: {
-               extern struct cpumem *mbstat;
-               uint64_t counters[MBSTAT_COUNT];
-               struct mbstat mbs;
-               unsigned int i;
-
-               memset(&mbs, 0, sizeof(mbs));
-               counters_read(mbstat, counters, MBSTAT_COUNT);
-               for (i = 0; i < MBSTAT_TYPES; i++)
-                       mbs.m_mtypes[i] = counters[i];
-
-               mbs.m_drops = counters[MBSTAT_DROPS];
-               mbs.m_wait = counters[MBSTAT_WAIT];
-               mbs.m_drain = counters[MBSTAT_DRAIN];
-
-               return (sysctl_rdstruct(oldp, oldlenp, newp,
-                   &mbs, sizeof(mbs)));
-       }
+       case KERN_MBSTAT:
+               return (sysctl_mbstat(name + 1, namelen -1, oldp, oldlenp,
+                   newp, newlen));
 #if defined(GPROF) || defined(DDBPROF)
        case KERN_PROF:
                return (sysctl_doprof(name + 1, namelen - 1, oldp, oldlenp,
Index: kern/uipc_mbuf.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v
retrieving revision 1.250
diff -u -p -r1.250 uipc_mbuf.c
--- kern/uipc_mbuf.c    12 Oct 2017 09:14:16 -0000      1.250
+++ kern/uipc_mbuf.c    17 Nov 2017 00:58:13 -0000
@@ -1421,6 +1421,39 @@ m_pool_init(struct pool *pp, u_int size,
        pool_set_constraints(pp, &kp_dma_contig);
 }
 
+int
+sysctl_mbstat(int *name, u_int namelen, void *oldp, size_t *oldlenp,
+    void *newp, size_t newlen)
+{
+       struct kinfo_pool pi;
+       uint64_t counters[MBSTAT_COUNT];
+       struct mbstat mbs;
+       unsigned int i;
+
+       if (namelen != 0)
+               return (ENOTDIR);
+
+       memset(&mbs, 0, sizeof(mbs));
+
+       counters_read(mbstat, counters, MBSTAT_COUNT);
+       for (i = 0; i < MBSTAT_TYPES; i++)
+               mbs.m_mtypes[i] = counters[i];
+
+       mbs.m_drops = counters[MBSTAT_DROPS];
+       mbs.m_wait = counters[MBSTAT_WAIT];
+       mbs.m_drain = counters[MBSTAT_DRAIN]; /* pool gcs? */
+
+       pool_info(&mbpool, &pi);
+       mbs.m_wait += pi.pr_nfail;
+
+       for (i = 0; i < nitems(mclpools); i++) {
+               pool_info(&mclpools[i], &pi);
+               mbs.m_wait += pi.pr_nfail;
+       }
+
+       return (sysctl_rdstruct(oldp, oldlenp, newp, &mbs, sizeof(mbs)));
+}
+
 #ifdef DDB
 void
 m_print(void *v,
Index: kern/subr_pool.c
===================================================================
RCS file: /cvs/src/sys/kern/subr_pool.c,v
retrieving revision 1.220
diff -u -p -r1.220 subr_pool.c
--- kern/subr_pool.c    13 Aug 2017 20:26:33 -0000      1.220
+++ kern/subr_pool.c    17 Nov 2017 00:58:13 -0000
@@ -1442,6 +1442,29 @@ pool_walk(struct pool *pp, int full,
 }
 #endif
 
+void
+pool_info(struct pool *pp, struct kinfo_pool *pi)
+{
+       pl_enter(pp, &pp->pr_lock);
+       pi->pr_size = pp->pr_size;
+       pi->pr_pgsize = pp->pr_pgsize;
+       pi->pr_itemsperpage = pp->pr_itemsperpage;
+       pi->pr_npages = pp->pr_npages;
+       pi->pr_minpages = pp->pr_minpages;
+       pi->pr_maxpages = pp->pr_maxpages;
+       pi->pr_hardlimit = pp->pr_hardlimit;
+       pi->pr_nout = pp->pr_nout;
+       pi->pr_nitems = pp->pr_nitems;
+       pi->pr_nget = pp->pr_nget;
+       pi->pr_nput = pp->pr_nput;
+       pi->pr_nfail = pp->pr_nfail;
+       pi->pr_npagealloc = pp->pr_npagealloc;
+       pi->pr_npagefree = pp->pr_npagefree;
+       pi->pr_hiwat = pp->pr_hiwat;
+       pi->pr_nidle = pp->pr_nidle;
+       pl_leave(pp, &pp->pr_lock);
+}
+
 /*
  * We have three different sysctls.
  * kern.pool.npools - the number of pools.
@@ -1490,25 +1513,7 @@ sysctl_dopool(int *name, u_int namelen, 
        case KERN_POOL_POOL:
                memset(&pi, 0, sizeof(pi));
 
-               pl_enter(pp, &pp->pr_lock);
-               pi.pr_size = pp->pr_size;
-               pi.pr_pgsize = pp->pr_pgsize;
-               pi.pr_itemsperpage = pp->pr_itemsperpage;
-               pi.pr_npages = pp->pr_npages;
-               pi.pr_minpages = pp->pr_minpages;
-               pi.pr_maxpages = pp->pr_maxpages;
-               pi.pr_hardlimit = pp->pr_hardlimit;
-               pi.pr_nout = pp->pr_nout;
-               pi.pr_nitems = pp->pr_nitems;
-               pi.pr_nget = pp->pr_nget;
-               pi.pr_nput = pp->pr_nput;
-               pi.pr_nfail = pp->pr_nfail;
-               pi.pr_npagealloc = pp->pr_npagealloc;
-               pi.pr_npagefree = pp->pr_npagefree;
-               pi.pr_hiwat = pp->pr_hiwat;
-               pi.pr_nidle = pp->pr_nidle;
-               pl_leave(pp, &pp->pr_lock);
-
+               pool_info(pp, &pi);
                pool_cache_pool_info(pp, &pi);
 
                rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi));

Re: mbuf statistics, tracking of drops

Reply via email to