On Tue, 8 Nov 2005, Attila Nagy wrote:
Any chance to investigate it further? It's a really annoying bug, which makes quota support a little bit useless.
Attached is a patch that corrects at least one or two deadlock scenarios in UNIX domain socket garbage collecting (unpgc_task.diff). I've also attached a patch for kern_descriptor.c that fixes a bug there in the passing of file descriptors referencing files over UNIX domain sockets (closef_threadnull.diff). I've committed the latter to 7.x, with the intent to merge to 6.x in a week or so. The larger UNIX domain socket garbage collection patch I need to think about some more, but I'm quite interested in whether it fixes the deadlock you're seeing (or for that matter, makes it worse somehow).
Thanks, Robert N M Watson
Index: uipc_usrreq.c =================================================================== RCS file: /home/ncvs/src/sys/kern/uipc_usrreq.c,v retrieving revision 1.158 diff -u -r1.158 uipc_usrreq.c --- uipc_usrreq.c 31 Oct 2005 15:41:25 -0000 1.158 +++ uipc_usrreq.c 9 Nov 2005 21:49:53 -0000 @@ -59,6 +59,7 @@ #include <sys/sx.h> #include <sys/sysctl.h> #include <sys/systm.h> +#include <sys/taskqueue.h> #include <sys/un.h> #include <sys/unpcb.h> #include <sys/vnode.h> @@ -112,6 +113,13 @@ #define UNP_LOCK_ASSERT() mtx_assert(&unp_mtx, MA_OWNED) #define UNP_UNLOCK_ASSERT() mtx_assert(&unp_mtx, MA_NOTOWNED) +/* + * Garbage collection of cyclic file descriptor/socket references occurs + * asynchronously in a taskqueue context. See unp_gc() for a full + * description. + */ +static struct task unp_gc_task; + static int unp_attach(struct socket *); static void unp_detach(struct unpcb *); static int unp_bind(struct unpcb *,struct sockaddr *, struct thread *); @@ -120,7 +128,7 @@ static void unp_disconnect(struct unpcb *); static void unp_shutdown(struct unpcb *); static void unp_drop(struct unpcb *, int); -static void unp_gc(void); +static void unp_gc(__unused void *, int); static void unp_scan(struct mbuf *, void (*)(struct file *)); static void unp_mark(struct file *); static void unp_discard(struct file *); @@ -795,19 +803,9 @@ } soisdisconnected(unp->unp_socket); unp->unp_socket->so_pcb = NULL; - if (unp_rights) { - /* - * Normally the receive buffer is flushed later, - * in sofree, but if our receive buffer holds references - * to descriptors that are now garbage, we will dispose - * of those descriptor references after the garbage collector - * gets them (resulting in a "panic: closef: count < 0"). - */ - sorflush(unp->unp_socket); - unp_gc(); /* Will unlock UNP. */ - } else - UNP_UNLOCK(); - UNP_UNLOCK_ASSERT(); + if (unp_rights) + taskqueue_enqueue(taskqueue_thread, &unp_gc_task); + UNP_UNLOCK(); if (unp->unp_addr != NULL) FREE(unp->unp_addr, M_SONAME); uma_zfree(unp_zone, unp); @@ -1395,7 +1393,7 @@ uma_zone_set_max(unp_zone, nmbclusters); LIST_INIT(&unp_dhead); LIST_INIT(&unp_shead); - + TASK_INIT(&unp_gc_task, 0, unp_gc, NULL); UNP_LOCK_INIT(); } @@ -1581,14 +1579,20 @@ } /* - * unp_defer is thread-local during garbage collection, and does not require - * explicit synchronization. unp_gcing prevents other threads from entering - * garbage collection, and perhaps should be an sx lock instead. + * unp_defer indicates whether additional work has been defered for a future + * pass through unp_gc(). It is thread local and does not require explicit + * synchronization. */ -static int unp_defer, unp_gcing; +static int unp_defer; + +static int unp_taskcount; +SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, ""); + +static int unp_recycled; +SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0, ""); static void -unp_gc(void) +unp_gc(__unused void *arg, int pending) { struct file *fp, *nextfp; struct socket *so; @@ -1597,15 +1601,8 @@ int nfiles_snap; int nfiles_slack = 20; - UNP_LOCK_ASSERT(); - - if (unp_gcing) { - UNP_UNLOCK(); - return; - } - unp_gcing = 1; + unp_taskcount++; unp_defer = 0; - UNP_UNLOCK(); /* * before going through all this, set all FDs to * be NOT defered and NOT externally accessible @@ -1700,6 +1697,9 @@ } while (unp_defer); sx_sunlock(&filelist_lock); /* + * XXXRW: The following comments need updating for a post-SMPng and + * deferred unp_gc() world, but are still generally accurate. + * * We grab an extra reference to each of the file table entries * that are not otherwise accessible and then free the rights * that are stored in messages on them. @@ -1788,12 +1788,11 @@ FILE_UNLOCK(tfp); } } - for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) + for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) { closef(*fpp, (struct thread *) NULL); + unp_recycled++; + } free(extra_ref, M_TEMP); - unp_gcing = 0; - - UNP_UNLOCK_ASSERT(); } void
Index: kern_descrip.c =================================================================== RCS file: /home/ncvs/src/sys/kern/kern_descrip.c,v retrieving revision 1.283 retrieving revision 1.284 diff -u -r1.283 -r1.284 --- kern_descrip.c 1 Nov 2005 17:13:05 -0000 1.283 +++ kern_descrip.c 9 Nov 2005 20:54:25 -0000 1.284 @@ -1880,9 +1880,13 @@ * a flag in the unlock to free ONLY locks obeying POSIX * semantics, and not to free BSD-style file locks. * If the descriptor was in a message, POSIX-style locks - * aren't passed with the descriptor. + * aren't passed with the descripto, and the thread pointer + * will be NULL. Callers should be careful only to pass a + * NULL thread pointer when there really is no owning + * context that might have locks, or the locks will be + * leaked. */ - if (fp->f_type == DTYPE_VNODE) { + if (fp->f_type == DTYPE_VNODE && td != NULL) { int vfslocked; vp = fp->f_vnode;
_______________________________________________ freebsd-hackers@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-hackers To unsubscribe, send any mail to "[EMAIL PROTECTED]"