The thread pool has a race condition if two elements complete before thread_pool_completion_bh() runs:
If element A's callback waits for element B using aio_poll() it will deadlock since pool->completion_bh is not marked scheduled when the nested aio_poll() runs. Fix this by marking the BH scheduled while thread_pool_completion_bh() is executing. This way any nested aio_poll() loops will enter thread_pool_completion_bh() and complete the remaining elements. Signed-off-by: Stefan Hajnoczi <stefa...@redhat.com> --- thread-pool.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/thread-pool.c b/thread-pool.c index 4cfd078..0ede168 100644 --- a/thread-pool.c +++ b/thread-pool.c @@ -65,6 +65,9 @@ struct ThreadPool { int max_threads; QEMUBH *new_thread_bh; + /* Atomic counter to detect completions while completion handler runs */ + uint32_t completion_token; + /* The following variables are only accessed from one AioContext. */ QLIST_HEAD(, ThreadPoolElement) head; @@ -118,6 +121,7 @@ static void *worker_thread(void *opaque) qemu_cond_broadcast(&pool->check_cancel); } + atomic_inc(&pool->completion_token); qemu_bh_schedule(pool->completion_bh); } @@ -167,9 +171,8 @@ static void spawn_thread(ThreadPool *pool) } } -static void thread_pool_completion_bh(void *opaque) +static void thread_pool_complete_elements(ThreadPool *pool) { - ThreadPool *pool = opaque; ThreadPoolElement *elem, *next; restart: @@ -196,6 +199,26 @@ restart: } } +static void thread_pool_completion_bh(void *opaque) +{ + ThreadPool *pool = opaque; + uint32_t token; + + do { + token = atomic_mb_read(&pool->completion_token); + + /* Stay scheduled in case elem->common.cb() makes a nested aio_poll() + * call. This avoids deadlock if element A's callback waits for + * element B and both completed at the same time. + */ + qemu_bh_schedule(pool->completion_bh); + + thread_pool_complete_elements(pool); + + qemu_bh_cancel(pool->completion_bh); + } while (token != pool->completion_token); +} + static void thread_pool_cancel(BlockDriverAIOCB *acb) { ThreadPoolElement *elem = (ThreadPoolElement *)acb; -- 1.9.3