Il 11/07/2014 13:20, Stefan Hajnoczi ha scritto:
The thread pool has a race condition if two elements complete before
thread_pool_completion_bh() runs:
If element A's callback waits for element B using aio_poll() it will
deadlock since pool->completion_bh is not marked scheduled when the
nested aio_poll() runs.
Fix this by marking the BH scheduled while thread_pool_completion_bh()
is executing. This way any nested aio_poll() loops will enter
thread_pool_completion_bh() and complete the remaining elements.
Signed-off-by: Stefan Hajnoczi <stefa...@redhat.com>
---
thread-pool.c | 27 +++++++++++++++++++++++++--
1 file changed, 25 insertions(+), 2 deletions(-)
diff --git a/thread-pool.c b/thread-pool.c
index 4cfd078..0ede168 100644
--- a/thread-pool.c
+++ b/thread-pool.c
@@ -65,6 +65,9 @@ struct ThreadPool {
int max_threads;
QEMUBH *new_thread_bh;
+ /* Atomic counter to detect completions while completion handler runs */
+ uint32_t completion_token;
+
/* The following variables are only accessed from one AioContext. */
QLIST_HEAD(, ThreadPoolElement) head;
@@ -118,6 +121,7 @@ static void *worker_thread(void *opaque)
qemu_cond_broadcast(&pool->check_cancel);
}
+ atomic_inc(&pool->completion_token);
qemu_bh_schedule(pool->completion_bh);
}
@@ -167,9 +171,8 @@ static void spawn_thread(ThreadPool *pool)
}
}
-static void thread_pool_completion_bh(void *opaque)
+static void thread_pool_complete_elements(ThreadPool *pool)
{
- ThreadPool *pool = opaque;
ThreadPoolElement *elem, *next;
restart:
@@ -196,6 +199,26 @@ restart:
}
}
+static void thread_pool_completion_bh(void *opaque)
+{
+ ThreadPool *pool = opaque;
+ uint32_t token;
+
+ do {
+ token = atomic_mb_read(&pool->completion_token);
+
+ /* Stay scheduled in case elem->common.cb() makes a nested aio_poll()
+ * call. This avoids deadlock if element A's callback waits for
+ * element B and both completed at the same time.
+ */
+ qemu_bh_schedule(pool->completion_bh);
+
+ thread_pool_complete_elements(pool);
+
+ qemu_bh_cancel(pool->completion_bh);
+ } while (token != pool->completion_token);
+}
+
static void thread_pool_cancel(BlockDriverAIOCB *acb)
{
ThreadPoolElement *elem = (ThreadPoolElement *)acb;
I am not sure I understand this patch.
The simplest way to fix deadlock is to change this in
thread_pool_completion_bh:
elem->common.cb(elem->common.opaque, elem->ret);
qemu_aio_release(elem);
goto restart;
to
/* In case elem->common.cb() makes a nested aio_poll() call,
* next may become invalid as well. Instead of just
* restarting the QLIST_FOREACH_SAFE, go through the BH
* once more, which also avoids deadlock if element A's
* callback waits for element B and both completed at the
* same time.
*/
qemu_bh_schedule(pool->completion_bh);
elem->common.cb(elem->common.opaque, elem->ret);
qemu_aio_release(elem);
There is no change in logic, it's just that the goto is switched to a BH
representing a continuation. I am then not sure why
pool->completion_token is necessary?
Perhaps it is just an optimization to avoid going multiple times around
aio_poll()?
Paolo