On Thu, Nov 16, 2017 at 08:13:39PM +0100, Gregor Best wrote:
> On Thu, Nov 16, 2017 at 11:13:04AM +1000, David Gwynne wrote:
> >
> > > On 16 Nov 2017, at 7:23 am, Gregor Best <[email protected]> wrote:
> > >
> > > Hi,
> > >
> > > On Mon, Nov 13, 2017 at 01:47:01PM +1000, David Gwynne wrote:
> > >> [...]
> > >> pools maintain count of how many times they failed to provide an
> > >> allocation. you can watch this with vmstat -m or systat pool.
> > >> however, we could use that to populate mb_drops too.
> > >> [...]
> > >
> > > That's certainly smarter than my idea.
> >
> > does it work though?
> >
>
> Now that I think about it, not quite, or at least not without reaching
> around into the pools innards to lock them while reading the statistics
> to get a consistent view.
currently pr_nfails is an unsigned long, which can be atomically
read on all our machines. it is a consistent view of the number.
however, i would like to make the stats uint64_t one day, so i agree
that it is better to lock to read them.
the diff below factors our the reading of the pool stats into a
struct kinfo_pool, which is used internally by pool and now by the
sysctl_mbstat.
>
> The problem is that the MBSTAT_DROPS part of the `counters` data
> structure never gets modified, so we'd still need to loop over all pools
> in sysctl_mbstat, since this is not just about mbpool but also about the
> pools for payload data.
ok. the updated diff below loops over the cluster pools too now.
>
> On the other hand, going back to my initial proposal would mean
> essentially duplicating functionality the pools already provide, so
> that's at least a bit unelegant. Especially since it adds at least one
> splnet/splx dance.
>
> Another option would be to just say "screw it" and count the failed
> allocations without acquiring any locks on the pools, maybe via atomic
> operations. Sounds a bit too complicated though.
>
> At the moment, and after a night's sleep, I think my initial proposal is
> the most straightforward way to do this. That, or getting rid of the
> confusing counters in `netstat -m` that stay at 0 all the time...
getting rid of the stats does appeal to me too...
Index: sys/pool.h
===================================================================
RCS file: /cvs/src/sys/sys/pool.h,v
retrieving revision 1.74
diff -u -p -r1.74 pool.h
--- sys/pool.h 13 Aug 2017 20:26:33 -0000 1.74
+++ sys/pool.h 17 Nov 2017 00:58:13 -0000
@@ -259,6 +259,7 @@ void pool_init(struct pool *, size_t, u
const char *, struct pool_allocator *);
void pool_cache_init(struct pool *);
void pool_destroy(struct pool *);
+void pool_info(struct pool *, struct kinfo_pool *);
void pool_setlowat(struct pool *, int);
void pool_sethiwat(struct pool *, int);
int pool_sethardlimit(struct pool *, u_int, const char *, int);
Index: sys/sysctl.h
===================================================================
RCS file: /cvs/src/sys/sys/sysctl.h,v
retrieving revision 1.175
diff -u -p -r1.175 sysctl.h
--- sys/sysctl.h 12 Oct 2017 09:14:16 -0000 1.175
+++ sys/sysctl.h 17 Nov 2017 00:58:13 -0000
@@ -936,6 +936,7 @@ int sysctl_rdstruct(void *, size_t *, vo
int sysctl_struct(void *, size_t *, void *, size_t, void *, size_t);
int sysctl_file(int *, u_int, char *, size_t *, struct proc *);
int sysctl_doproc(int *, u_int, char *, size_t *);
+int sysctl_mbstat(int *, u_int, void *, size_t *, void *, size_t);
struct mbuf_queue;
int sysctl_mq(int *, u_int, void *, size_t *, void *, size_t,
struct mbuf_queue *);
Index: kern/kern_sysctl.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.330
diff -u -p -r1.330 kern_sysctl.c
--- kern/kern_sysctl.c 11 Aug 2017 21:24:19 -0000 1.330
+++ kern/kern_sysctl.c 17 Nov 2017 00:58:13 -0000
@@ -392,24 +392,9 @@ kern_sysctl(int *name, u_int namelen, vo
case KERN_FILE:
return (sysctl_file(name + 1, namelen - 1, oldp, oldlenp, p));
#endif
- case KERN_MBSTAT: {
- extern struct cpumem *mbstat;
- uint64_t counters[MBSTAT_COUNT];
- struct mbstat mbs;
- unsigned int i;
-
- memset(&mbs, 0, sizeof(mbs));
- counters_read(mbstat, counters, MBSTAT_COUNT);
- for (i = 0; i < MBSTAT_TYPES; i++)
- mbs.m_mtypes[i] = counters[i];
-
- mbs.m_drops = counters[MBSTAT_DROPS];
- mbs.m_wait = counters[MBSTAT_WAIT];
- mbs.m_drain = counters[MBSTAT_DRAIN];
-
- return (sysctl_rdstruct(oldp, oldlenp, newp,
- &mbs, sizeof(mbs)));
- }
+ case KERN_MBSTAT:
+ return (sysctl_mbstat(name + 1, namelen -1, oldp, oldlenp,
+ newp, newlen));
#if defined(GPROF) || defined(DDBPROF)
case KERN_PROF:
return (sysctl_doprof(name + 1, namelen - 1, oldp, oldlenp,
Index: kern/uipc_mbuf.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v
retrieving revision 1.250
diff -u -p -r1.250 uipc_mbuf.c
--- kern/uipc_mbuf.c 12 Oct 2017 09:14:16 -0000 1.250
+++ kern/uipc_mbuf.c 17 Nov 2017 00:58:13 -0000
@@ -1421,6 +1421,39 @@ m_pool_init(struct pool *pp, u_int size,
pool_set_constraints(pp, &kp_dma_contig);
}
+int
+sysctl_mbstat(int *name, u_int namelen, void *oldp, size_t *oldlenp,
+ void *newp, size_t newlen)
+{
+ struct kinfo_pool pi;
+ uint64_t counters[MBSTAT_COUNT];
+ struct mbstat mbs;
+ unsigned int i;
+
+ if (namelen != 0)
+ return (ENOTDIR);
+
+ memset(&mbs, 0, sizeof(mbs));
+
+ counters_read(mbstat, counters, MBSTAT_COUNT);
+ for (i = 0; i < MBSTAT_TYPES; i++)
+ mbs.m_mtypes[i] = counters[i];
+
+ mbs.m_drops = counters[MBSTAT_DROPS];
+ mbs.m_wait = counters[MBSTAT_WAIT];
+ mbs.m_drain = counters[MBSTAT_DRAIN]; /* pool gcs? */
+
+ pool_info(&mbpool, &pi);
+ mbs.m_wait += pi.pr_nfail;
+
+ for (i = 0; i < nitems(mclpools); i++) {
+ pool_info(&mclpools[i], &pi);
+ mbs.m_wait += pi.pr_nfail;
+ }
+
+ return (sysctl_rdstruct(oldp, oldlenp, newp, &mbs, sizeof(mbs)));
+}
+
#ifdef DDB
void
m_print(void *v,
Index: kern/subr_pool.c
===================================================================
RCS file: /cvs/src/sys/kern/subr_pool.c,v
retrieving revision 1.220
diff -u -p -r1.220 subr_pool.c
--- kern/subr_pool.c 13 Aug 2017 20:26:33 -0000 1.220
+++ kern/subr_pool.c 17 Nov 2017 00:58:13 -0000
@@ -1442,6 +1442,29 @@ pool_walk(struct pool *pp, int full,
}
#endif
+void
+pool_info(struct pool *pp, struct kinfo_pool *pi)
+{
+ pl_enter(pp, &pp->pr_lock);
+ pi->pr_size = pp->pr_size;
+ pi->pr_pgsize = pp->pr_pgsize;
+ pi->pr_itemsperpage = pp->pr_itemsperpage;
+ pi->pr_npages = pp->pr_npages;
+ pi->pr_minpages = pp->pr_minpages;
+ pi->pr_maxpages = pp->pr_maxpages;
+ pi->pr_hardlimit = pp->pr_hardlimit;
+ pi->pr_nout = pp->pr_nout;
+ pi->pr_nitems = pp->pr_nitems;
+ pi->pr_nget = pp->pr_nget;
+ pi->pr_nput = pp->pr_nput;
+ pi->pr_nfail = pp->pr_nfail;
+ pi->pr_npagealloc = pp->pr_npagealloc;
+ pi->pr_npagefree = pp->pr_npagefree;
+ pi->pr_hiwat = pp->pr_hiwat;
+ pi->pr_nidle = pp->pr_nidle;
+ pl_leave(pp, &pp->pr_lock);
+}
+
/*
* We have three different sysctls.
* kern.pool.npools - the number of pools.
@@ -1490,25 +1513,7 @@ sysctl_dopool(int *name, u_int namelen,
case KERN_POOL_POOL:
memset(&pi, 0, sizeof(pi));
- pl_enter(pp, &pp->pr_lock);
- pi.pr_size = pp->pr_size;
- pi.pr_pgsize = pp->pr_pgsize;
- pi.pr_itemsperpage = pp->pr_itemsperpage;
- pi.pr_npages = pp->pr_npages;
- pi.pr_minpages = pp->pr_minpages;
- pi.pr_maxpages = pp->pr_maxpages;
- pi.pr_hardlimit = pp->pr_hardlimit;
- pi.pr_nout = pp->pr_nout;
- pi.pr_nitems = pp->pr_nitems;
- pi.pr_nget = pp->pr_nget;
- pi.pr_nput = pp->pr_nput;
- pi.pr_nfail = pp->pr_nfail;
- pi.pr_npagealloc = pp->pr_npagealloc;
- pi.pr_npagefree = pp->pr_npagefree;
- pi.pr_hiwat = pp->pr_hiwat;
- pi.pr_nidle = pp->pr_nidle;
- pl_leave(pp, &pp->pr_lock);
-
+ pool_info(pp, &pi);
pool_cache_pool_info(pp, &pi);
rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi));