On Fri, Jun 21, 2024 at 01:24:27PM +0200, Martin Pieuchot wrote: > So I'm trying to see where the remaining sched_yield() are coming from > ld(1): > > $ cd /sys/arch/arm64/compile/GENERIC.MP > $ LD="egdb --args ld" make -j32 > > Then I add a breakpoint on sched_yield & hit run. > > As soon as the first thread is stopped, I can see the trace as usual, > however the process is now in a "stopped" state, impossible to kill or > continue. Even ddb's kill command doesn't help.
So this is caused by multiple issues in ptrace, the single thread API and the sleep machinery. The fundamental issue is that the ps_singlecnt gets off and egdb hangs in single_thread_wait(). The issue at hand is that single_thread_check() for suspends while in deep is just not right. It will decrement ps_singlecnt but if the proc was already in SSLEEP then that decrement was already done in single_thread_set(). Here is a possible solution to this problem by adding yet another proc flag to track the time between sleep_setup and single_thread_check (via sleep_signal_check). Using this information single_thread_set() can be adjusted to no decrement ps_singlecnt in that case. On top of this I had to fix ptrace to better respect gdb's request to progress a single thread or all threads. Without that I just constantly hit egdb asserts because of unexpected progress of unnrelated threads. With this I can add the sched_yield break point and continue through the full run. Does this break other stuff? Most probably :) there is a lot of bits that are not quite right when it comes to signals and multiple threads. -- :wq Claudio Index: kern/kern_sig.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sig.c,v diff -u -p -r1.333 kern_sig.c --- kern/kern_sig.c 22 Jul 2024 09:43:47 -0000 1.333 +++ kern/kern_sig.c 24 Jul 2024 14:14:06 -0000 @@ -851,7 +851,9 @@ trapsignal(struct proc *p, int signum, u SCHED_UNLOCK(); signum = pr->ps_xsig; - single_thread_clear(p, 0); + if ((p->p_flag & P_TRACESINGLE) == 0) + single_thread_clear(p, 0); + atomic_clearbits_int(&p->p_flag, P_TRACESINGLE); /* * If we are no longer being traced, or the parent @@ -1370,7 +1372,9 @@ cursig(struct proc *p, struct sigctx *sc atomic_clearbits_int(&pr->ps_siglist, mask); } - single_thread_clear(p, 0); + if ((p->p_flag & P_TRACESINGLE) == 0) + single_thread_clear(p, 0); + atomic_clearbits_int(&p->p_flag, P_TRACESINGLE); /* * If we are no longer being traced, or the parent @@ -2077,6 +2081,9 @@ single_thread_check_locked(struct proc * MUTEX_ASSERT_LOCKED(&pr->ps_mtx); + if (deep) + atomic_clearbits_int(&p->p_flag, P_SINGLESLEEP); + if (pr->ps_single == NULL || pr->ps_single == p) return (0); @@ -2177,16 +2184,23 @@ single_thread_set(struct proc *p, int fl if (mode == SINGLE_EXIT) { unsleep(q); setrunnable(q); - } else + } else { --pr->ps_singlecnt; + } break; case SSLEEP: /* if it's not interruptible, then just have to wait */ if (q->p_flag & P_SINTR) { /* merely need to suspend? just stop it */ if (mode == SINGLE_SUSPEND) { + /* + * if between sleep_setup and + * sleep_signal_check don't count us + * out. + */ + if ((q->p_flag & P_SINGLESLEEP) == 0) + --pr->ps_singlecnt; q->p_stat = SSTOP; - --pr->ps_singlecnt; break; } /* need to unwind or exit, so wake it */ @@ -2263,6 +2277,8 @@ single_thread_clear(struct proc *p, int */ SCHED_LOCK(); if (q->p_stat == SSTOP && (q->p_flag & flag) == 0) { + if (flag == 0) + atomic_clearbits_int(&q->p_flag, P_SUSPSIG); if (q->p_wchan == NULL) setrunnable(q); else { Index: kern/kern_synch.c =================================================================== RCS file: /cvs/src/sys/kern/kern_synch.c,v diff -u -p -r1.206 kern_synch.c --- kern/kern_synch.c 23 Jul 2024 08:38:02 -0000 1.206 +++ kern/kern_synch.c 24 Jul 2024 14:14:06 -0000 @@ -356,7 +356,7 @@ sleep_setup(const volatile void *ident, atomic_setbits_int(&p->p_flag, P_WSLEEP); TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_runq); if (prio & PCATCH) - atomic_setbits_int(&p->p_flag, P_SINTR); + atomic_setbits_int(&p->p_flag, P_SINTR | P_SINGLESLEEP); p->p_stat = SSLEEP; SCHED_UNLOCK(); @@ -399,15 +399,18 @@ sleep_finish(int timo, int do_sleep) */ if (p->p_wchan == NULL) do_sleep = 0; + KASSERT((p->p_flag & P_SINGLESLEEP) == 0); atomic_clearbits_int(&p->p_flag, P_WSLEEP); + /* If requested to stop always force a stop even if do_sleep == 0 */ + if (p->p_stat == SSTOP) + do_sleep = 1; if (do_sleep) { KASSERT(p->p_stat == SSLEEP || p->p_stat == SSTOP); p->p_ru.ru_nvcsw++; mi_switch(); } else { - KASSERT(p->p_stat == SONPROC || p->p_stat == SSLEEP || - p->p_stat == SSTOP); + KASSERT(p->p_stat == SONPROC || p->p_stat == SSLEEP); unsleep(p); p->p_stat = SONPROC; } @@ -420,10 +423,7 @@ sleep_finish(int timo, int do_sleep) p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri; SCHED_UNLOCK(); - /* - * Even though this belongs to the signal handling part of sleep, - * we need to clear it before the ktrace. - */ + /* Must clear this before hitting another sleep point. */ atomic_clearbits_int(&p->p_flag, P_SINTR); if (timo != 0) { Index: kern/sys_process.c =================================================================== RCS file: /cvs/src/sys/kern/sys_process.c,v diff -u -p -r1.98 sys_process.c --- kern/sys_process.c 3 Jun 2024 12:48:25 -0000 1.98 +++ kern/sys_process.c 24 Jul 2024 14:14:06 -0000 @@ -441,6 +441,8 @@ ptrace_ctrl(struct proc *p, int req, pid if (pid < THREAD_PID_OFFSET && tr->ps_single) t = tr->ps_single; + else + atomic_setbits_int(&t->p_flag, P_TRACESINGLE); /* If the address parameter is not (int *)1, set the pc. */ if ((int *)addr != (int *)1) Index: sys/proc.h =================================================================== RCS file: /cvs/src/sys/sys/proc.h,v diff -u -p -r1.365 proc.h --- sys/proc.h 22 Jul 2024 09:43:47 -0000 1.365 +++ sys/proc.h 24 Jul 2024 14:14:06 -0000 @@ -430,9 +430,11 @@ struct proc { #define P_SIGSUSPEND 0x00000008 /* Need to restore before-suspend mask*/ #define P_CANTSLEEP 0x00000010 /* insomniac thread */ #define P_WSLEEP 0x00000020 /* Working on going to sleep. */ +#define P_SINGLESLEEP 0x00000040 /* Like P_WSLEEP for single thread api */ #define P_SINTR 0x00000080 /* Sleep is interruptible. */ #define P_SYSTEM 0x00000200 /* No sigs, stats or swapping. */ #define P_TIMEOUT 0x00000400 /* Timing out during sleep. */ +#define P_TRACESINGLE 0x00001000 /* keep single threaded ptraced. */ #define P_WEXIT 0x00002000 /* Working on exiting. */ #define P_OWEUPC 0x00008000 /* Owe proc an addupc() at next ast. */ #define P_SUSPSINGLE 0x00080000 /* Need to stop for single threading. */ @@ -443,9 +445,9 @@ struct proc { #define P_BITS \ ("\20" "\01INKTR" "\02PROFPEND" "\03ALRMPEND" "\04SIGSUSPEND" \ - "\05CANTSLEEP" "\06WSLEEP" "\010SINTR" "\012SYSTEM" "\013TIMEOUT" \ - "\016WEXIT" "\020OWEUPC" "\024SUSPSINGLE" "\030CONTINUED" "\033THREAD" \ - "\034SUSPSIG" "\037CPUPEG") + "\05CANTSLEEP" "\06WSLEEP" "\07SINGLESLEEP" "\010SINTR" "\012SYSTEM" \ + "\013TIMEOUT" "\015TRACESINGLE" "\016WEXIT" "\020OWEUPC" "\024SUSPSINGLE" \ + "\030CONTINUED" "\033THREAD" "\034SUSPSIG" "\037CPUPEG") #define THREAD_PID_OFFSET 100000