Module Name: src Committed By: riastradh Date: Wed Aug 9 08:23:13 UTC 2023
Modified Files: src/sys/kern: subr_workqueue.c src/tests/rump/rumpkern: t_workqueue.c Log Message: workqueue(9): Avoid touching running work items in workqueue_wait. As soon as the workqueue function has called, it is forbidden to touch the struct work passed to it -- the function might free or reuse the data structure it is embedded in. So workqueue_wait is forbidden to search the queue for the batch of running work items. Instead, use a generation number which is odd while the thread is processing a batch of work and even when not. There's still a small optimization available with the struct work pointer to wait for: if we find the work item in one of the per-CPU _pending_ queues, then after we wait for a batch of work to complete on that CPU, we don't need to wait for work on any other CPUs. PR kern/57574 XXX pullup-10 XXX pullup-9 XXX pullup-8 To generate a diff of this commit: cvs rdiff -u -r1.41 -r1.42 src/sys/kern/subr_workqueue.c cvs rdiff -u -r1.3 -r1.4 src/tests/rump/rumpkern/t_workqueue.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/kern/subr_workqueue.c diff -u src/sys/kern/subr_workqueue.c:1.41 src/sys/kern/subr_workqueue.c:1.42 --- src/sys/kern/subr_workqueue.c:1.41 Sat Oct 29 11:41:00 2022 +++ src/sys/kern/subr_workqueue.c Wed Aug 9 08:23:13 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: subr_workqueue.c,v 1.41 2022/10/29 11:41:00 riastradh Exp $ */ +/* $NetBSD: subr_workqueue.c,v 1.42 2023/08/09 08:23:13 riastradh Exp $ */ /*- * Copyright (c)2002, 2005, 2006, 2007 YAMAMOTO Takashi, @@ -27,7 +27,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: subr_workqueue.c,v 1.41 2022/10/29 11:41:00 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: subr_workqueue.c,v 1.42 2023/08/09 08:23:13 riastradh Exp $"); #include <sys/param.h> #include <sys/cpu.h> @@ -51,7 +51,7 @@ struct workqueue_queue { kmutex_t q_mutex; kcondvar_t q_cv; struct workqhead q_queue_pending; - struct workqhead q_queue_running; + uint64_t q_gen; lwp_t *q_worker; }; @@ -161,6 +161,8 @@ workqueue_worker(void *cookie) if (wq->wq_flags & WQ_FPU) s = kthread_fpu_enter(); for (;;) { + struct workqhead tmp; + /* * we violate abstraction of SIMPLEQ. */ @@ -168,18 +170,25 @@ workqueue_worker(void *cookie) mutex_enter(&q->q_mutex); while (SIMPLEQ_EMPTY(&q->q_queue_pending)) cv_wait(&q->q_cv, &q->q_mutex); - KASSERT(SIMPLEQ_EMPTY(&q->q_queue_running)); - q->q_queue_running.sqh_first = - q->q_queue_pending.sqh_first; /* XXX */ + tmp.sqh_first = q->q_queue_pending.sqh_first; /* XXX */ SIMPLEQ_INIT(&q->q_queue_pending); + + /* + * Mark the queue as actively running a batch of work + * by setting the generation number odd. + */ + q->q_gen |= 1; mutex_exit(&q->q_mutex); - workqueue_runlist(wq, &q->q_queue_running); + workqueue_runlist(wq, &tmp); + /* + * Notify workqueue_wait that we have completed a batch + * of work by incrementing the generation number. + */ mutex_enter(&q->q_mutex); - KASSERT(!SIMPLEQ_EMPTY(&q->q_queue_running)); - SIMPLEQ_INIT(&q->q_queue_running); - /* Wake up workqueue_wait */ + KASSERTMSG(q->q_gen & 1, "q=%p gen=%"PRIu64, q, q->q_gen); + q->q_gen++; cv_broadcast(&q->q_cv); mutex_exit(&q->q_mutex); } @@ -212,7 +221,7 @@ workqueue_initqueue(struct workqueue *wq mutex_init(&q->q_mutex, MUTEX_DEFAULT, ipl); cv_init(&q->q_cv, wq->wq_name); SIMPLEQ_INIT(&q->q_queue_pending); - SIMPLEQ_INIT(&q->q_queue_running); + q->q_gen = 0; ktf = ((wq->wq_flags & WQ_MPSAFE) != 0 ? KTHREAD_MPSAFE : 0); if (wq->wq_prio < PRI_KERNEL) ktf |= KTHREAD_TS; @@ -329,25 +338,48 @@ workqueue_q_wait(struct workqueue_queue { work_impl_t *wk; bool found = false; + uint64_t gen; mutex_enter(&q->q_mutex); + + /* + * Avoid a deadlock scenario. We can't guarantee that + * wk_target has completed at this point, but we can't wait for + * it either, so do nothing. + * + * XXX Are there use-cases that require this semantics? + */ if (q->q_worker == curlwp) goto out; + + /* + * Wait until the target is no longer pending. If we find it + * on this queue, the caller can stop looking in other queues. + * If we don't find it in this queue, however, we can't skip + * waiting -- it may be hidden in the running queue which we + * have no access to. + */ again: SIMPLEQ_FOREACH(wk, &q->q_queue_pending, wk_entry) { - if (wk == wk_target) - goto found; + if (wk == wk_target) { + found = true; + cv_wait(&q->q_cv, &q->q_mutex); + goto again; + } } - SIMPLEQ_FOREACH(wk, &q->q_queue_running, wk_entry) { - if (wk == wk_target) - goto found; - } - found: - if (wk != NULL) { - found = true; - cv_wait(&q->q_cv, &q->q_mutex); - goto again; + + /* + * The target may be in the batch of work currently running, + * but we can't touch that queue. So if there's anything + * running, wait until the generation changes. + */ + gen = q->q_gen; + if (gen & 1) { + do + cv_wait(&q->q_cv, &q->q_mutex); + while (gen == q->q_gen); } + out: mutex_exit(&q->q_mutex); Index: src/tests/rump/rumpkern/t_workqueue.c diff -u src/tests/rump/rumpkern/t_workqueue.c:1.3 src/tests/rump/rumpkern/t_workqueue.c:1.4 --- src/tests/rump/rumpkern/t_workqueue.c:1.3 Wed Aug 9 08:23:03 2023 +++ src/tests/rump/rumpkern/t_workqueue.c Wed Aug 9 08:23:13 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: t_workqueue.c,v 1.3 2023/08/09 08:23:03 riastradh Exp $ */ +/* $NetBSD: t_workqueue.c,v 1.4 2023/08/09 08:23:13 riastradh Exp $ */ /*- * Copyright (c) 2017 The NetBSD Foundation, Inc. @@ -88,7 +88,6 @@ ATF_TC_HEAD(workqueue_wait_pause, tc) ATF_TC_BODY(workqueue_wait_pause, tc) { - atf_tc_expect_fail("PR kern/57574"); REQUIRE_LIBC(signal(SIGSEGV, &sigsegv), SIG_ERR); rump_init();