On Wed, Feb 19, 2025 at 07:12:23PM +0300, Vitalii Mordan wrote:
> TSAN reports a potential data race on the state field of
> ThreadPoolElement. This is fixed by using atomic access to the field.

The tsan output from the bug report:

WARNING: ThreadSanitizer: data race (pid=787043)
  Write of size 4 at 0x7b1c00000660 by thread T5 (mutexes: write M0):
    #0 worker_thread /home/mordan/qemu/build/../util/thread-pool.c:108:20 
(test-thread-pool-smc+0xa65a56)
    #1 qemu_thread_start 
/home/mordan/qemu/build/../util/qemu-thread-posix.c:543:9 
(test-thread-pool-smc+0xa49040)

  Previous read of size 4 at 0x7b1c00000660 by main thread:
    #0 thread_pool_completion_bh 
/home/mordan/qemu/build/../util/thread-pool.c:183:19 
(test-thread-pool-smc+0xa6549d)
    #1 aio_bh_call /home/mordan/qemu/build/../util/async.c:171:5 
(test-thread-pool-smc+0xa5e03e)
    #2 aio_bh_poll /home/mordan/qemu/build/../util/async.c:218:13 
(test-thread-pool-smc+0xa5e03e)
    #3 aio_poll /home/mordan/qemu/build/../util/aio-posix.c:722:17 
(test-thread-pool-smc+0xa4343a)
    #4 test_submit_many 
/home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:133:9 
(test-thread-pool-smc+0x50e638)
    #5 do_test_cancel 
/home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:150:5 
(test-thread-pool-smc+0x50e638)
    #6 test_cancel_async 
/home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:234:5 
(test-thread-pool-smc+0x50e638)
    #7 main /home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:249:3 
(test-thread-pool-smc+0x50e638)

  Location is heap block of size 104 at 0x7b1c00000620 allocated by main thread:
    #0 malloc 
out/lib/clangrt-x86_64-unknown-linux-gnu/./out/lib/clangrt-x86_64-unknown-linux-gnu/./toolchain/llvm-project/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp:667:5
 (test-thread-pool-smc+0x346131)
    #1 g_malloc <null> (libglib-2.0.so.0+0x5e738) (BuildId: 
e845b8fd2f396872c036976626389ffc4f50c9c5)
    #2 thread_pool_submit_aio 
/home/mordan/qemu/build/../util/thread-pool.c:251:11 
(test-thread-pool-smc+0xa648bd)
    #3 test_submit_many 
/home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:128:9 
(test-thread-pool-smc+0x50e600)
    #4 do_test_cancel 
/home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:150:5 
(test-thread-pool-smc+0x50e600)
    #5 test_cancel_async 
/home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:234:5 
(test-thread-pool-smc+0x50e600)
    #6 main /home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:249:3 
(test-thread-pool-smc+0x50e600)

  Mutex M0 (0x7b3c00000100) created at:
    #0 pthread_mutex_init 
out/lib/clangrt-x86_64-unknown-linux-gnu/./out/lib/clangrt-x86_64-unknown-linux-gnu/./toolchain/llvm-project/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp:1316:3
 (test-thread-pool-smc+0x34914f)
    #1 qemu_mutex_init 
/home/mordan/qemu/build/../util/qemu-thread-posix.c:71:11 
(test-thread-pool-smc+0xa47189)
    #2 thread_pool_init_one /home/mordan/qemu/build/../util/thread-pool.c:334:5 
(test-thread-pool-smc+0xa64f60)
    #3 thread_pool_new /home/mordan/qemu/build/../util/thread-pool.c:348:5 
(test-thread-pool-smc+0xa64f60)
    #4 aio_get_thread_pool /home/mordan/qemu/build/../util/async.c:441:28 
(test-thread-pool-smc+0xa5e6d4)
    #5 thread_pool_submit_aio 
/home/mordan/qemu/build/../util/thread-pool.c:246:24 
(test-thread-pool-smc+0xa6488d)
    #6 test_submit_many 
/home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:128:9 
(test-thread-pool-smc+0x50e600)
    #7 do_test_cancel 
/home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:150:5 
(test-thread-pool-smc+0x50e600)
    #8 test_cancel_async 
/home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:234:5 
(test-thread-pool-smc+0x50e600)
    #9 main /home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:249:3 
(test-thread-pool-smc+0x50e600)

  Thread T5 'worker' (tid=787049, running) created by thread T4 at:
    #0 pthread_create 
out/lib/clangrt-x86_64-unknown-linux-gnu/./out/lib/clangrt-x86_64-unknown-linux-gnu/./toolchain/llvm-project/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp:1022:3
 (test-thread-pool-smc+0x34791d)
    #1 qemu_thread_create 
/home/mordan/qemu/build/../util/qemu-thread-posix.c:583:11 
(test-thread-pool-smc+0xa48ed0)
    #2 do_spawn_thread /home/mordan/qemu/build/../util/thread-pool.c:146:5 
(test-thread-pool-smc+0xa658de)
    #3 worker_thread /home/mordan/qemu/build/../util/thread-pool.c:83:5 
(test-thread-pool-smc+0xa658de)
    #4 qemu_thread_start 
/home/mordan/qemu/build/../util/qemu-thread-posix.c:543:9 
(test-thread-pool-smc+0xa49040)

SUMMARY: ThreadSanitizer: data race 
/home/mordan/qemu/build/../util/thread-pool.c:108:20 in worker_thread

My interpretation is that tsan is saying there is a data race between
the load in thread_pool_completion_bh():

  static void thread_pool_completion_bh(void *opaque)
  {
      ThreadPool *pool = opaque;
      ThreadPoolElement *elem, *next;

      defer_call_begin(); /* cb() may use defer_call() to coalesce work */

  restart:
      QLIST_FOREACH_SAFE(elem, &pool->head, all, next) {
          if (elem->state != THREAD_DONE) {
              ^^^^^^^^^^^

and the store in worker_thread():

    req = QTAILQ_FIRST(&pool->request_list);
    QTAILQ_REMOVE(&pool->request_list, req, reqs);
    req->state = THREAD_ACTIVE;
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^
    qemu_mutex_unlock(&pool->lock);

It doesn't matter whether thread_pool_completion_bh() sees THREAD_QUEUED
or THREAD_ACTIVE, so this looks like a false positive. There is no
practical effect either way.

THREAD_QUEUED vs THREAD_ACTIVE matters in thread_pool_cancel(), but that
is protected by pool->lock.

Paolo: Any thoughts?

Stefan

> 
> Fixes: d354c7eccf ("aio: add generic thread-pool facility")
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2822
> Signed-off-by: Vitalii Mordan <mor...@ispras.ru>
> ---
>  util/thread-pool.c | 13 ++++++-------
>  1 file changed, 6 insertions(+), 7 deletions(-)
> 
> diff --git a/util/thread-pool.c b/util/thread-pool.c
> index 27eb777e85..6c5f4d085b 100644
> --- a/util/thread-pool.c
> +++ b/util/thread-pool.c
> @@ -111,9 +111,8 @@ static void *worker_thread(void *opaque)
>          ret = req->func(req->arg);
>  
>          req->ret = ret;
> -        /* Write ret before state.  */
> -        smp_wmb();
> -        req->state = THREAD_DONE;
> +        /* Atomically update state after setting ret.  */
> +        qatomic_store_release(&req->state, THREAD_DONE);
>  
>          qemu_bh_schedule(pool->completion_bh);
>          qemu_mutex_lock(&pool->lock);
> @@ -180,7 +179,7 @@ static void thread_pool_completion_bh(void *opaque)
>  
>  restart:
>      QLIST_FOREACH_SAFE(elem, &pool->head, all, next) {
> -        if (elem->state != THREAD_DONE) {
> +        if (qatomic_load_acquire(&elem->state) != THREAD_DONE) {
>              continue;
>          }
>  
> @@ -223,12 +222,12 @@ static void thread_pool_cancel(BlockAIOCB *acb)
>      trace_thread_pool_cancel(elem, elem->common.opaque);
>  
>      QEMU_LOCK_GUARD(&pool->lock);
> -    if (elem->state == THREAD_QUEUED) {
> +    if (qatomic_load_acquire(&elem->state) == THREAD_QUEUED) {
>          QTAILQ_REMOVE(&pool->request_list, elem, reqs);
>          qemu_bh_schedule(pool->completion_bh);
>  
> -        elem->state = THREAD_DONE;
>          elem->ret = -ECANCELED;
> +        qatomic_store_release(&elem->state, THREAD_DONE);
>      }
>  
>  }
> @@ -251,8 +250,8 @@ BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, 
> void *arg,
>      req = qemu_aio_get(&thread_pool_aiocb_info, NULL, cb, opaque);
>      req->func = func;
>      req->arg = arg;
> -    req->state = THREAD_QUEUED;
>      req->pool = pool;
> +    qatomic_store_release(&req->state, THREAD_QUEUED);
>  
>      QLIST_INSERT_HEAD(&pool->head, req, all);
>  
> -- 
> 2.34.1
> 

Attachment: signature.asc
Description: PGP signature

Reply via email to