DTrace has an unused logging facility built-in. The logging is intended to be safe to be called from the handler of a DTrace probe (what DTrace calls "probe context"). Because a DTrace probe could be enabled almost anywhere in the kernel, this means that it can't use standard FreeBSD synchronization primitives, and so it has rolled its own spin lock. As so often happens when rolling your own synchronization, there are several bugs here:
1) It doesn't use spinlock_enter/spinlock_exit, which means that a thread holding a spin lock can be switched out. A subsequent thread could be starved trying to acquire the lock (worse, there's the chance of a nasty case of priority inversion here, where a low-priority thread holds the lock but cannot run because a high-priority thread is spinning on that same lock). This is the bug that caused me to start looking into this code: I saw a situation where a user thread had taken the lock and subsequently been switched out, and a softclock thread starved waiting for the spin lock. 2) The code uses per-CPU buffers and thus has a lock for every CPU. It ends up doing the following in many cases: dtrace_debug_lock(curcpu); //... dtrace_debug_unlock(curcpu); There is nothing that guarantees that the thread can not have migrated to a new CPU in the meantime. 3) The locks do not use acquire and release memory barriers. 4) The locks are all right next to each other in memory, so false sharing will defeat much of the purpose of having per-CPU buffers in the first place. 5) The locks do not record their owner, make it difficult to debug problems with them. (There is a sixth problem, namely that this code is compiled into the kernel when nothing in the tree uses this log facility, but there is still some code paths which check for log messages. That's why I saw my crash in #1 in the first place. I plan on taking it out in a separate commit.) The following patch addresses these issues. I've tried stress-testing DTrace but I've been unable to reproduce my original crash on head. Any comments or objections? Index: sys/cddl/dev/dtrace/dtrace_debug.c =================================================================== --- sys/cddl/dev/dtrace/dtrace_debug.c (revision 243795) +++ sys/cddl/dev/dtrace/dtrace_debug.c (working copy) @@ -39,6 +39,7 @@ struct dtrace_debug_data { char bufr[DTRACE_DEBUG_BUFR_SIZE]; + uintptr_t lock; char *first; char *last; char *next; @@ -46,12 +47,14 @@ static char dtrace_debug_bufr[DTRACE_DEBUG_BUFR_SIZE]; -static volatile u_long dtrace_debug_flag[MAXCPU]; - static void dtrace_debug_lock(int cpu) { - while (dtrace_cmpset_long(&dtrace_debug_flag[cpu], 0, 1) == 0) + uintptr_t tid; + + tid = (uintptr_t)curthread; + spinlock_enter(); + while (atomic_cmpset_acq_long(&dtrace_debug_data[cpu].lock, 0, tid) == 0) /* Loop until the lock is obtained. */ ; } @@ -59,7 +62,8 @@ static void dtrace_debug_unlock(int cpu) { - dtrace_debug_flag[cpu] = 0; + atomic_store_rel_long(&dtrace_debug_data[cpu].lock, 0); + spinlock_exit(); } static void @@ -151,10 +155,11 @@ */ static __inline void -dtrace_debug__putc(char c) +dtrace_debug__putc(int cpu, char c) { - struct dtrace_debug_data *d = &dtrace_debug_data[curcpu]; + struct dtrace_debug_data *d; + d = &dtrace_debug_data[cpu]; *d->next++ = c; if (d->next == d->last) @@ -172,24 +177,30 @@ static void __used dtrace_debug_putc(char c) { - dtrace_debug_lock(curcpu); + int cpu; - dtrace_debug__putc(c); + cpu = curcpu; + dtrace_debug_lock(cpu); - dtrace_debug_unlock(curcpu); + dtrace_debug__putc(cpu, c); + + dtrace_debug_unlock(cpu); } static void __used dtrace_debug_puts(const char *s) { - dtrace_debug_lock(curcpu); + int cpu; + cpu = curcpu; + dtrace_debug_lock(cpu); + while (*s != '\0') - dtrace_debug__putc(*s++); + dtrace_debug__putc(cpu, *s++); - dtrace_debug__putc('\0'); + dtrace_debug__putc(cpu, '\0'); - dtrace_debug_unlock(curcpu); + dtrace_debug_unlock(cpu); } /* @@ -219,7 +230,7 @@ #define MAXNBUF (sizeof(intmax_t) * NBBY + 1) static void -dtrace_debug_vprintf(const char *fmt, va_list ap) +dtrace_debug_vprintf(int cpu, const char *fmt, va_list ap) { char nbuf[MAXNBUF]; const char *p, *percent, *q; @@ -243,10 +254,10 @@ width = 0; while ((ch = (u_char)*fmt++) != '%' || stop) { if (ch == '\0') { - dtrace_debug__putc('\0'); + dtrace_debug__putc(cpu, '\0'); return; } - dtrace_debug__putc(ch); + dtrace_debug__putc(cpu, ch); } percent = fmt - 1; qflag = 0; lflag = 0; ladjust = 0; sharpflag = 0; neg = 0; @@ -266,7 +277,7 @@ ladjust = 1; goto reswitch; case '%': - dtrace_debug__putc(ch); + dtrace_debug__putc(cpu, ch); break; case '*': if (!dot) { @@ -301,7 +312,7 @@ num = (u_int)va_arg(ap, int); p = va_arg(ap, char *); for (q = dtrace_debug_ksprintn(nbuf, num, *p++, NULL, 0); *q;) - dtrace_debug__putc(*q--); + dtrace_debug__putc(cpu, *q--); if (num == 0) break; @@ -309,19 +320,20 @@ for (tmp = 0; *p;) { n = *p++; if (num & (1 << (n - 1))) { - dtrace_debug__putc(tmp ? ',' : '<'); + dtrace_debug__putc(cpu, + tmp ? ',' : '<'); for (; (n = *p) > ' '; ++p) - dtrace_debug__putc(n); + dtrace_debug__putc(cpu, n); tmp = 1; } else for (; *p > ' '; ++p) continue; } if (tmp) - dtrace_debug__putc('>'); + dtrace_debug__putc(cpu, '>'); break; case 'c': - dtrace_debug__putc(va_arg(ap, int)); + dtrace_debug__putc(cpu, va_arg(ap, int)); break; case 'D': up = va_arg(ap, u_char *); @@ -329,12 +341,12 @@ if (!width) width = 16; while(width--) { - dtrace_debug__putc(hex2ascii(*up >> 4)); - dtrace_debug__putc(hex2ascii(*up & 0x0f)); + dtrace_debug__putc(cpu, hex2ascii(*up >> 4)); + dtrace_debug__putc(cpu, hex2ascii(*up & 0x0f)); up++; if (width) for (q=p;*q;q++) - dtrace_debug__putc(*q); + dtrace_debug__putc(cpu, *q); } break; case 'd': @@ -406,12 +418,12 @@ if (!ladjust && width > 0) while (width--) - dtrace_debug__putc(padc); + dtrace_debug__putc(cpu, padc); while (n--) - dtrace_debug__putc(*p++); + dtrace_debug__putc(cpu, *p++); if (ladjust && width > 0) while (width--) - dtrace_debug__putc(padc); + dtrace_debug__putc(cpu, padc); break; case 't': tflag = 1; @@ -485,32 +497,32 @@ if (!ladjust && padc != '0' && width && (width -= tmp) > 0) while (width--) - dtrace_debug__putc(padc); + dtrace_debug__putc(cpu, padc); if (neg) - dtrace_debug__putc('-'); + dtrace_debug__putc(cpu, '-'); if (sharpflag && num != 0) { if (base == 8) { - dtrace_debug__putc('0'); + dtrace_debug__putc(cpu, '0'); } else if (base == 16) { - dtrace_debug__putc('0'); - dtrace_debug__putc('x'); + dtrace_debug__putc(cpu, '0'); + dtrace_debug__putc(cpu, 'x'); } } if (!ladjust && width && (width -= tmp) > 0) while (width--) - dtrace_debug__putc(padc); + dtrace_debug__putc(cpu, padc); while (*p) - dtrace_debug__putc(*p--); + dtrace_debug__putc(cpu, *p--); if (ladjust && width && (width -= tmp) > 0) while (width--) - dtrace_debug__putc(padc); + dtrace_debug__putc(cpu, padc); break; default: while (percent < fmt) - dtrace_debug__putc(*percent++); + dtrace_debug__putc(cpu, *percent++); /* * Since we ignore an formatting argument it is no * longer safe to obey the remaining formatting @@ -522,23 +534,26 @@ } } - dtrace_debug__putc('\0'); + dtrace_debug__putc(cpu, '\0'); } void dtrace_debug_printf(const char *fmt, ...) { va_list ap; + int cpu; - dtrace_debug_lock(curcpu); + cpu = curcpu; + dtrace_debug_lock(cpu); + va_start(ap, fmt); - dtrace_debug_vprintf(fmt, ap); + dtrace_debug_vprintf(cpu, fmt, ap); va_end(ap); - dtrace_debug_unlock(curcpu); + dtrace_debug_unlock(cpu); } #else _______________________________________________ freebsd-hackers@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-hackers To unsubscribe, send any mail to "freebsd-hackers-unsubscr...@freebsd.org"