based on a conversation at the bar.
POOL_DEBUG is expensive. But we really want it because it finds bugs
before they hurt us. The solution to this is to make it simpler to
turn off.
This diff starts the kernel with pool debug on, but allows it to be
turned off with sysctl kern.pool_debug=0. This does not gaurantee
that all the pool pages will be unchecked, but it does help.
This will let people who care about performance turn it off permanently
in sysctl.conf; I think we will add a line there for people to know how
to use it.
Index: kern/subr_pool.c
===================================================================
RCS file: /cvs/src/sys/kern/subr_pool.c,v
retrieving revision 1.100
diff -u -r1.100 subr_pool.c
--- kern/subr_pool.c 3 Apr 2011 22:07:37 -0000 1.100
+++ kern/subr_pool.c 3 Apr 2011 22:59:39 -0000
@@ -42,7 +42,7 @@
#include <sys/sysctl.h>
#include <uvm/uvm.h>
-
+#include <dev/rndvar.h>
/*
* Pool resource management utility.
@@ -74,6 +74,7 @@
caddr_t ph_page; /* this page's address */
caddr_t ph_colored; /* page's colored address */
int ph_pagesize;
+ int ph_magic;
};
struct pool_item {
@@ -89,6 +90,7 @@
#else
#define PI_MAGIC 0xdeafbeef
#endif
+int pool_debug = 1;
#define POOL_NEEDS_CATCHUP(pp)
\
((pp)->pr_nitems < (pp)->pr_minitems)
@@ -441,7 +443,8 @@
else
ph = pool_get(&phpool, (flags & ~(PR_WAITOK | PR_ZERO)) |
PR_NOWAIT);
-
+ if (pool_debug)
+ ph->ph_magic = PI_MAGIC;
return (ph);
}
@@ -611,13 +614,15 @@
"page %p; item addr %p; offset 0x%x=0x%x",
pp->pr_wchan, ph->ph_page, pi, 0, pi->pi_magic);
#ifdef POOL_DEBUG
- for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int);
- i < pp->pr_size / sizeof(int); i++) {
- if (ip[i] != PI_MAGIC) {
- panic("pool_do_get(%s): free list modified: "
- "page %p; item addr %p; offset 0x%x=0x%x",
- pp->pr_wchan, ph->ph_page, pi,
- i * sizeof(int), ip[i]);
+ if (pool_debug && ph->ph_magic) {
+ for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int);
+ i < pp->pr_size / sizeof(int); i++) {
+ if (ip[i] != ph->ph_magic) {
+ panic("pool_do_get(%s): free list modified: "
+ "page %p; item addr %p; offset 0x%x=0x%x",
+ pp->pr_wchan, ph->ph_page, pi,
+ i * sizeof(int), ip[i]);
+ }
}
}
#endif /* POOL_DEBUG */
@@ -731,9 +736,11 @@
#ifdef DIAGNOSTIC
pi->pi_magic = PI_MAGIC;
#ifdef POOL_DEBUG
- for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int);
- i < pp->pr_size / sizeof(int); i++)
- ip[i] = PI_MAGIC;
+ if (ph->ph_magic) {
+ for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int);
+ i < pp->pr_size / sizeof(int); i++)
+ ip[i] = ph->ph_magic;
+ }
#endif /* POOL_DEBUG */
#endif /* DIAGNOSTIC */
@@ -886,9 +893,11 @@
#ifdef DIAGNOSTIC
pi->pi_magic = PI_MAGIC;
#ifdef POOL_DEBUG
- for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int);
- i < pp->pr_size / sizeof(int); i++)
- ip[i] = PI_MAGIC;
+ if (ph->ph_magic) {
+ for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int);
+ i < pp->pr_size / sizeof(int); i++)
+ ip[i] = ph->ph_magic;
+ }
#endif /* POOL_DEBUG */
#endif /* DIAGNOSTIC */
cp = (caddr_t)(cp + pp->pr_size);
@@ -1273,14 +1282,16 @@
0, pi->pi_magic);
}
#ifdef POOL_DEBUG
- for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int);
- i < pp->pr_size / sizeof(int); i++) {
- if (ip[i] != PI_MAGIC) {
- printf("pool(%s): free list modified: "
- "page %p; item ordinal %d; addr %p "
- "(p %p); offset 0x%x=0x%x\n",
- pp->pr_wchan, ph->ph_page, n, pi,
- page, i * sizeof(int), ip[i]);
+ if (pool_debug && ph->ph_magic) {
+ for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int);
+ i < pp->pr_size / sizeof(int); i++) {
+ if (ip[i] != ph->ph_magic) {
+ printf("pool(%s): free list modified: "
+ "page %p; item ordinal %d; addr %p "
+ "(p %p); offset 0x%x=0x%x\n",
+ pp->pr_wchan, ph->ph_page, n, pi,
+ page, i * sizeof(int), ip[i]);
+ }
}
}
Index: kern/kern_sysctl.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.199
diff -u -r1.199 kern_sysctl.c
--- kern/kern_sysctl.c 2 Apr 2011 16:47:17 -0000 1.199
+++ kern/kern_sysctl.c 3 Apr 2011 23:57:45 -0000
@@ -270,6 +270,7 @@
extern int cryptodevallowsoft;
#endif
extern int maxlocksperuid;
+ extern int pool_debug;
/* all sysctl names at this level are terminal except a ton of them */
if (namelen != 1) {
@@ -591,6 +592,14 @@
return sysctl_rdstruct(oldp, oldlenp, newp, &dev, sizeof(dev));
case KERN_NETLIVELOCKS:
return (sysctl_rdint(oldp, oldlenp, newp, mcllivelocks));
+ case KERN_POOL_DEBUG:
+ error = sysctl_int(oldp, oldlenp, newp, newlen,
+ &pool_debug);
+ if (error == 0) {
+ pool_reclaim_all();
+ /* XXX How about a second reclaim in a few seconds? */
+ }
+ return (error);
default:
return (EOPNOTSUPP);
}
Index: sys/sysctl.h
===================================================================
RCS file: /cvs/src/sys/sys/sysctl.h,v
retrieving revision 1.109
diff -u -r1.109 sysctl.h
--- sys/sysctl.h 12 Mar 2011 04:54:28 -0000 1.109
+++ sys/sysctl.h 3 Apr 2011 22:41:20 -0000
@@ -188,7 +188,8 @@
#define KERN_RTHREADS 74 /* kernel rthreads support
enabled */
#define KERN_CONSDEV 75 /* dev_t: console terminal
device */
#define KERN_NETLIVELOCKS 76 /* int: number of network
livelocks */
-#define KERN_MAXID 77 /* number of valid kern ids */
+#define KERN_POOL_DEBUG 77 /* int: enable pool_debug */
+#define KERN_MAXID 78 /* number of valid kern ids */
#define CTL_KERN_NAMES { \
{ 0, 0 }, \
@@ -268,6 +269,7 @@
{ "rthreads", CTLTYPE_INT }, \
{ "consdev", CTLTYPE_STRUCT }, \
{ "netlivelocks", CTLTYPE_INT }, \
+ { "pool_debug", CTLTYPE_INT }, \
}
/*