STINNER Victor <vstin...@python.org> added the comment:

When the test hangs, a background Python process starts to use 100% of my CPU.

This process is stuck in _thread.start_new_thread(). I opened the process in 
gdb.

(gdb) thread 1
[Switching to thread 1 (Thread 0x7fece33615c0 (LWP 132301))]
#0  __interceptor_pthread_create (thread=thread@entry=0x7ffe79cd6190, 
attr=attr@entry=0x7ffe79cd61b0, start_routine=start_routine@entry=0x6fed30 
<pythread_wrapper>, arg=arg@entry=0x60200019aed0)
    at ../../../../libsanitizer/asan/asan_interceptors.cpp:245
245         while (atomic_load(&param.is_registered, memory_order_acquire) == 0)
(gdb) where
#0  __interceptor_pthread_create (thread=thread@entry=0x7ffe79cd6190, 
attr=attr@entry=0x7ffe79cd61b0, start_routine=start_routine@entry=0x6fed30 
<pythread_wrapper>, arg=arg@entry=0x60200019aed0)
    at ../../../../libsanitizer/asan/asan_interceptors.cpp:245
#1  0x00000000006ff242 in PyThread_start_new_thread (func=func@entry=0x7f45d0 
<thread_run>, arg=arg@entry=0x604000e83c50) at Python/thread_pthread.h:287
(...)

(gdb) thread 2
[Switching to thread 2 (Thread 0x7fecd151c640 (LWP 132302))]
#0  __sanitizer::atomic_exchange<__sanitizer::atomic_uint32_t> 
(mo=__sanitizer::memory_order_acquire, v=2, a=0x640000001b00) at 
../../../../libsanitizer/sanitizer_common/sanitizer_atomic_clang.h:62
62      inline typename T::Type atomic_exchange(volatile T *a,
(gdb) where
#0  __sanitizer::atomic_exchange<__sanitizer::atomic_uint32_t> 
(mo=__sanitizer::memory_order_acquire, v=2, a=0x640000001b00) at 
../../../../libsanitizer/sanitizer_common/sanitizer_atomic_clang.h:62
#1  __sanitizer::BlockingMutex::Lock (this=this@entry=0x640000001b00) at 
../../../../libsanitizer/sanitizer_common/sanitizer_linux.cpp:649
#2  0x00007fece3914e15 in 
__sanitizer::GenericScopedLock<__sanitizer::BlockingMutex>::GenericScopedLock 
(mu=0x640000001b00, this=<synthetic pointer>) at 
../../../../libsanitizer/sanitizer_common/sanitizer_mutex.h:183
#3  
__sanitizer::SizeClassAllocator64<__asan::AP64<__sanitizer::LocalAddressSpaceView>
 >::GetFromAllocator (this=this@entry=0x7fece3a13f00 <__asan::instance>, 
stat=stat@entry=0x7feccfb46c40, class_id=class_id@entry=36, 
    chunks=chunks@entry=0x7feccfb42330, n_chunks=n_chunks@entry=8) at 
../../../../libsanitizer/sanitizer_common/sanitizer_allocator_primary64.h:146
#4  0x00007fece3914f41 in 
__sanitizer::SizeClassAllocator64LocalCache<__sanitizer::SizeClassAllocator64<__asan::AP64<__sanitizer::LocalAddressSpaceView>
 > >::Refill (this=this@entry=0x7feccfb390e0, c=c@entry=0x7feccfb42320, 
    allocator=allocator@entry=0x7fece3a13f00 <__asan::instance>, 
class_id=class_id@entry=36) at 
../../../../libsanitizer/sanitizer_common/sanitizer_allocator_local_cache.h:102
#5  0x00007fece3915209 in 
__sanitizer::SizeClassAllocator64LocalCache<__sanitizer::SizeClassAllocator64<__asan::AP64<__sanitizer::LocalAddressSpaceView>
 > >::Allocate (class_id=36, allocator=0x7fece3a13f00 <__asan::instance>, 
    this=0x7feccfb390e0) at 
../../../../libsanitizer/sanitizer_common/sanitizer_allocator_local_cache.h:38
#6  
__sanitizer::CombinedAllocator<__sanitizer::SizeClassAllocator64<__asan::AP64<__sanitizer::LocalAddressSpaceView>
 >, __sanitizer::LargeMmapAllocatorPtrArrayDynamic>::Allocate 
(this=this@entry=0x7fece3a13f00 <__asan::instance>, 
    cache=0x7feccfb390e0, size=<optimized out>, size@entry=8192, 
alignment=alignment@entry=1) at 
../../../../libsanitizer/sanitizer_common/sanitizer_allocator_combined.h:69
#7  0x00007fece39118f6 in __asan::QuarantineCallback::Allocate (size=8192, 
this=<synthetic pointer>) at 
../../../../libsanitizer/asan/asan_allocator.cpp:869
#8  __sanitizer::QuarantineCache<__asan::QuarantineCallback>::Enqueue (size=32, 
ptr=0x6030005477d0, cb=..., this=0x7feccfb39060) at 
../../../../libsanitizer/sanitizer_common/sanitizer_quarantine.h:223
#9  __sanitizer::Quarantine<__asan::QuarantineCallback, __asan::AsanChunk>::Put 
(size=32, ptr=0x6030005477d0, cb=..., c=0x7feccfb39060, this=<optimized out>) 
at ../../../../libsanitizer/sanitizer_common/sanitizer_quarantine.h:105
#10 __asan::Allocator::QuarantineChunk (this=this@entry=0x7fece3a13f00 
<__asan::instance>, m=m@entry=0x6030005477d0, stack=stack@entry=0x7fecd151b360, 
ptr=0x6030005477e0) at ../../../../libsanitizer/asan/asan_allocator.cpp:666
#11 0x00007fece3911ba5 in __asan::Allocator::Deallocate 
(alloc_type=__asan::FROM_MALLOC, stack=0x7fecd151b360, delete_alignment=0, 
delete_size=0, ptr=0x6030005477e0, this=0x7fece3a13f00 <__asan::instance>)
    at ../../../../libsanitizer/asan/asan_allocator.cpp:714
#12 0x00007fece3997618 in __interceptor_free (ptr=0x6030005477e0) at 
../../../../libsanitizer/asan/asan_malloc_linux.cpp:128
#13 0x00007fece362f7a4 in pthread_getattr_np@GLIBC_2.2.5 () from 
/lib64/libc.so.6
#14 0x00007fece39b142e in __sanitizer::GetThreadStackTopAndBottom 
(at_initialization=at_initialization@entry=false, 
stack_top=stack_top@entry=0x7fecd151bd10, 
stack_bottom=stack_bottom@entry=0x7fecd151bd18)
    at ../../../../libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp:143
#15 0x00007fece39b1778 in __sanitizer::GetThreadStackAndTls (main=<optimized 
out>, stk_addr=stk_addr@entry=0x7feccfb39020, 
stk_size=stk_size@entry=0x7fecd151bd68, tls_addr=tls_addr@entry=0x7feccfb39040, 
    tls_size=tls_size@entry=0x7fecd151bd60) at 
../../../../libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp:543
#16 0x00007fece39a3c93 in __asan::AsanThread::SetThreadStackAndTls 
(this=this@entry=0x7feccfb39000, options=<optimized out>) at 
../../../../libsanitizer/asan/asan_thread.h:81
#17 0x00007fece39a3fd6 in __asan::AsanThread::Init 
(this=this@entry=0x7feccfb39000, options=options@entry=0x0) at 
../../../../libsanitizer/asan/asan_thread.cpp:226
#18 0x00007fece39a446e in __asan::AsanThread::ThreadStart (this=0x7feccfb39000, 
os_id=132302, signal_thread_is_registered=0x7ffe79cd58f8) at 
../../../../libsanitizer/asan/asan_thread.cpp:258
#19 0x00007fece3781299 in start_thread () from /lib64/libpthread.so.0
#20 0x00007fece36a9353 in clone () from /lib64/libc.so.6


The thread 1 is stuck at the loop at the end of pthread_create(), line 245 an 
246: wait until &param.is_registered == 0.

210     INTERCEPTOR(int, pthread_create, void *thread,
211         void *attr, void *(*start_routine)(void*), void *arg) {
212       EnsureMainThreadIDIsCorrect();
213       // Strict init-order checking is thread-hostile.
214       if (flags()->strict_init_order)
215         StopInitOrderChecking();
216       GET_STACK_TRACE_THREAD;
217       int detached = 0;
218       if (attr)
219         REAL(pthread_attr_getdetachstate)(attr, &detached);
220       ThreadStartParam param;
221       atomic_store(&param.t, 0, memory_order_relaxed);
222       atomic_store(&param.is_registered, 0, memory_order_relaxed);
223       int result;
224       {
225         // Ignore all allocations made by pthread_create: thread stack/TLS 
may be
226         // stored by pthread for future reuse even after thread 
destruction, and
227         // the linked list it's stored in doesn't even hold valid pointers 
to the
228         // objects, the latter are calculated by obscure pointer arithmetic.
229     #if CAN_SANITIZE_LEAKS
230         __lsan::ScopedInterceptorDisabler disabler;
231     #endif
232         result = REAL(pthread_create)(thread, attr, asan_thread_start, 
&param);
233       }
234       if (result == 0) {
235         u32 current_tid = GetCurrentTidOrInvalid();
236         AsanThread *t =
237             AsanThread::Create(start_routine, arg, current_tid, &stack, 
detached);
238         atomic_store(&param.t, reinterpret_cast<uptr>(t), 
memory_order_release);
239         // Wait until the AsanThread object is initialized and the 
ThreadRegistry
240         // entry is in "started" state. One reason for this is that after 
this
241         // interceptor exits, the child thread's stack may be the only 
thing holding
242         // the |arg| pointer. This may cause LSan to report a leak if leak 
checking
243         // happens at a point when the interceptor has already exited, but 
the stack
244         // range for the child thread is not yet known.
245         while (atomic_load(&param.is_registered, memory_order_acquire) == 0)
246           internal_sched_yield();
247       }
248       return result;
249     }



The thread 2 seems to be stuck in a syscall done by 
__sanitizer::atomic_exchange<__sanitizer::atomic_uint32_t>, called by 
__sanitizer::BlockingMutex::Lock:

(gdb) disassemble 
Dump of assembler code for function _ZN11__sanitizer13BlockingMutex4LockEv:
   0x00007fece39af850 <+0>:     endbr64 
   0x00007fece39af854 <+4>:     push   r12
   0x00007fece39af856 <+6>:     push   rbp
   0x00007fece39af857 <+7>:     push   rbx
   0x00007fece39af858 <+8>:     mov    rcx,QWORD PTR [rdi+0x50]
   0x00007fece39af85c <+12>:    test   rcx,rcx
   0x00007fece39af85f <+15>:    jne    0x7fece39af8a8 
<_ZN11__sanitizer13BlockingMutex4LockEv+88>
   0x00007fece39af861 <+17>:    mov    eax,0x1
   0x00007fece39af866 <+22>:    xchg   DWORD PTR [rdi],eax
   0x00007fece39af868 <+24>:    test   eax,eax
   0x00007fece39af86a <+26>:    je     0x7fece39af89e 
<_ZN11__sanitizer13BlockingMutex4LockEv+78>
   0x00007fece39af86c <+28>:    mov    ebp,0x2
   0x00007fece39af871 <+33>:    mov    r12d,0xca
   0x00007fece39af877 <+39>:    mov    esi,0x80
   0x00007fece39af87c <+44>:    xor    ebx,ebx
   0x00007fece39af87e <+46>:    mov    edx,0x2
   0x00007fece39af883 <+51>:    jmp    0x7fece39af896 
<_ZN11__sanitizer13BlockingMutex4LockEv+70>
   0x00007fece39af885 <+53>:    nop    DWORD PTR [rax]
   0x00007fece39af888 <+56>:    mov    rax,r12
   0x00007fece39af88b <+59>:    mov    r10,rbx
   0x00007fece39af88e <+62>:    mov    r8,rbx
   0x00007fece39af891 <+65>:    mov    r9,rbx
   0x00007fece39af894 <+68>:    syscall 
=> 0x00007fece39af896 <+70>:    mov    eax,ebp
   0x00007fece39af898 <+72>:    xchg   DWORD PTR [rdi],eax
   0x00007fece39af89a <+74>:    test   eax,eax
   0x00007fece39af89c <+76>:    jne    0x7fece39af888 
<_ZN11__sanitizer13BlockingMutex4LockEv+56>
   0x00007fece39af89e <+78>:    pop    rbx
   0x00007fece39af89f <+79>:    pop    rbp
   0x00007fece39af8a0 <+80>:    pop    r12
   0x00007fece39af8a2 <+82>:    ret    
   0x00007fece39af8a3 <+83>:    nop    DWORD PTR [rax+rax*1+0x0]
   0x00007fece39af8a8 <+88>:    xor    r8d,r8d
   0x00007fece39af8ab <+91>:    lea    rdx,[rip+0x37d7c]        # 0x7fece39e762e
   0x00007fece39af8b2 <+98>:    mov    esi,0x285
   0x00007fece39af8b7 <+103>:   lea    rdi,[rip+0x37dfa]        # 0x7fece39e76b8
   0x00007fece39af8be <+110>:   call   0x7fece39be990 
<_ZN11__sanitizer11CheckFailedEPKciS1_yy>
End of assembler dump.

----------

_______________________________________
Python tracker <rep...@bugs.python.org>
<https://bugs.python.org/issue45200>
_______________________________________
_______________________________________________
Python-bugs-list mailing list
Unsubscribe: 
https://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

Reply via email to