On Fri, Jun 21, 2024 at 01:24:27PM +0200, Martin Pieuchot wrote:
> So I'm trying to see where the remaining sched_yield() are coming from
> ld(1):
> 
> $ cd /sys/arch/arm64/compile/GENERIC.MP
> $ LD="egdb --args ld" make -j32
> 
> Then I add a breakpoint on sched_yield & hit run.
> 
> As soon as the first thread is stopped, I can see the trace as usual,
> however the process is now in a "stopped" state, impossible to kill or
> continue.  Even ddb's kill command doesn't help.

So this is caused by multiple issues in ptrace, the single thread API
and the sleep machinery. The fundamental issue is that the ps_singlecnt
gets off and egdb hangs in single_thread_wait().

The issue at hand is that single_thread_check() for suspends while in deep
is just not right. It will decrement ps_singlecnt but if the proc was
already in SSLEEP then that decrement was already done in
single_thread_set().

Here is a possible solution to this problem by adding yet another proc
flag to track the time between sleep_setup and single_thread_check (via
sleep_signal_check). Using this information single_thread_set() can be
adjusted to no decrement ps_singlecnt in that case.

On top of this I had to fix ptrace to better respect gdb's request to
progress a single thread or all threads. Without that I just constantly
hit egdb asserts because of unexpected progress of unnrelated threads.

With this I can add the sched_yield break point and continue through the
full run. Does this break other stuff?
Most probably :) there is a lot of bits that are not quite right when it
comes to signals and multiple threads.

-- 
:wq Claudio

Index: kern/kern_sig.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sig.c,v
diff -u -p -r1.333 kern_sig.c
--- kern/kern_sig.c     22 Jul 2024 09:43:47 -0000      1.333
+++ kern/kern_sig.c     24 Jul 2024 14:14:06 -0000
@@ -851,7 +851,9 @@ trapsignal(struct proc *p, int signum, u
                        SCHED_UNLOCK();
 
                        signum = pr->ps_xsig;
-                       single_thread_clear(p, 0);
+                       if ((p->p_flag & P_TRACESINGLE) == 0)
+                               single_thread_clear(p, 0);
+                       atomic_clearbits_int(&p->p_flag, P_TRACESINGLE);
 
                        /*
                         * If we are no longer being traced, or the parent
@@ -1370,7 +1372,9 @@ cursig(struct proc *p, struct sigctx *sc
                                atomic_clearbits_int(&pr->ps_siglist, mask);
                        }
 
-                       single_thread_clear(p, 0);
+                       if ((p->p_flag & P_TRACESINGLE) == 0)
+                               single_thread_clear(p, 0);
+                       atomic_clearbits_int(&p->p_flag, P_TRACESINGLE);
 
                        /*
                         * If we are no longer being traced, or the parent
@@ -2077,6 +2081,9 @@ single_thread_check_locked(struct proc *
 
        MUTEX_ASSERT_LOCKED(&pr->ps_mtx);
 
+       if (deep)
+               atomic_clearbits_int(&p->p_flag, P_SINGLESLEEP);
+
        if (pr->ps_single == NULL || pr->ps_single == p)
                return (0);
 
@@ -2177,16 +2184,23 @@ single_thread_set(struct proc *p, int fl
                        if (mode == SINGLE_EXIT) {
                                unsleep(q);
                                setrunnable(q);
-                       } else
+                       } else {
                                --pr->ps_singlecnt;
+                       }
                        break;
                case SSLEEP:
                        /* if it's not interruptible, then just have to wait */
                        if (q->p_flag & P_SINTR) {
                                /* merely need to suspend?  just stop it */
                                if (mode == SINGLE_SUSPEND) {
+                                       /*
+                                        * if between sleep_setup and
+                                        * sleep_signal_check don't count us
+                                        * out.
+                                        */
+                                       if ((q->p_flag & P_SINGLESLEEP) == 0)
+                                               --pr->ps_singlecnt;
                                        q->p_stat = SSTOP;
-                                       --pr->ps_singlecnt;
                                        break;
                                }
                                /* need to unwind or exit, so wake it */
@@ -2263,6 +2277,8 @@ single_thread_clear(struct proc *p, int 
                 */
                SCHED_LOCK();
                if (q->p_stat == SSTOP && (q->p_flag & flag) == 0) {
+                       if (flag == 0)
+                               atomic_clearbits_int(&q->p_flag, P_SUSPSIG);
                        if (q->p_wchan == NULL)
                                setrunnable(q);
                        else {
Index: kern/kern_synch.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_synch.c,v
diff -u -p -r1.206 kern_synch.c
--- kern/kern_synch.c   23 Jul 2024 08:38:02 -0000      1.206
+++ kern/kern_synch.c   24 Jul 2024 14:14:06 -0000
@@ -356,7 +356,7 @@ sleep_setup(const volatile void *ident, 
        atomic_setbits_int(&p->p_flag, P_WSLEEP);
        TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_runq);
        if (prio & PCATCH)
-               atomic_setbits_int(&p->p_flag, P_SINTR);
+               atomic_setbits_int(&p->p_flag, P_SINTR | P_SINGLESLEEP);
        p->p_stat = SSLEEP;
 
        SCHED_UNLOCK();
@@ -399,15 +399,18 @@ sleep_finish(int timo, int do_sleep)
         */
        if (p->p_wchan == NULL)
                do_sleep = 0;
+       KASSERT((p->p_flag & P_SINGLESLEEP) == 0);
        atomic_clearbits_int(&p->p_flag, P_WSLEEP);
 
+       /* If requested to stop always force a stop even if do_sleep == 0 */
+       if (p->p_stat == SSTOP)
+               do_sleep = 1;
        if (do_sleep) {
                KASSERT(p->p_stat == SSLEEP || p->p_stat == SSTOP);
                p->p_ru.ru_nvcsw++;
                mi_switch();
        } else {
-               KASSERT(p->p_stat == SONPROC || p->p_stat == SSLEEP ||
-                   p->p_stat == SSTOP);
+               KASSERT(p->p_stat == SONPROC || p->p_stat == SSLEEP);
                unsleep(p);
                p->p_stat = SONPROC;
        }
@@ -420,10 +423,7 @@ sleep_finish(int timo, int do_sleep)
        p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri;
        SCHED_UNLOCK();
 
-       /*
-        * Even though this belongs to the signal handling part of sleep,
-        * we need to clear it before the ktrace.
-        */
+       /* Must clear this before hitting another sleep point. */
        atomic_clearbits_int(&p->p_flag, P_SINTR);
 
        if (timo != 0) {
Index: kern/sys_process.c
===================================================================
RCS file: /cvs/src/sys/kern/sys_process.c,v
diff -u -p -r1.98 sys_process.c
--- kern/sys_process.c  3 Jun 2024 12:48:25 -0000       1.98
+++ kern/sys_process.c  24 Jul 2024 14:14:06 -0000
@@ -441,6 +441,8 @@ ptrace_ctrl(struct proc *p, int req, pid
 
                if (pid < THREAD_PID_OFFSET && tr->ps_single)
                        t = tr->ps_single;
+               else
+                       atomic_setbits_int(&t->p_flag, P_TRACESINGLE);
 
                /* If the address parameter is not (int *)1, set the pc. */
                if ((int *)addr != (int *)1)
Index: sys/proc.h
===================================================================
RCS file: /cvs/src/sys/sys/proc.h,v
diff -u -p -r1.365 proc.h
--- sys/proc.h  22 Jul 2024 09:43:47 -0000      1.365
+++ sys/proc.h  24 Jul 2024 14:14:06 -0000
@@ -430,9 +430,11 @@ struct proc {
 #define        P_SIGSUSPEND    0x00000008      /* Need to restore 
before-suspend mask*/
 #define        P_CANTSLEEP     0x00000010      /* insomniac thread */
 #define        P_WSLEEP        0x00000020      /* Working on going to sleep. */
+#define        P_SINGLESLEEP   0x00000040      /* Like P_WSLEEP for single 
thread api */
 #define        P_SINTR         0x00000080      /* Sleep is interruptible. */
 #define        P_SYSTEM        0x00000200      /* No sigs, stats or swapping. 
*/
 #define        P_TIMEOUT       0x00000400      /* Timing out during sleep. */
+#define        P_TRACESINGLE   0x00001000      /* keep single threaded 
ptraced. */
 #define        P_WEXIT         0x00002000      /* Working on exiting. */
 #define        P_OWEUPC        0x00008000      /* Owe proc an addupc() at next 
ast. */
 #define        P_SUSPSINGLE    0x00080000      /* Need to stop for single 
threading. */
@@ -443,9 +445,9 @@ struct proc {
 
 #define        P_BITS \
     ("\20" "\01INKTR" "\02PROFPEND" "\03ALRMPEND" "\04SIGSUSPEND" \
-     "\05CANTSLEEP" "\06WSLEEP" "\010SINTR" "\012SYSTEM" "\013TIMEOUT" \
-     "\016WEXIT" "\020OWEUPC" "\024SUSPSINGLE" "\030CONTINUED" "\033THREAD" \
-     "\034SUSPSIG" "\037CPUPEG")
+     "\05CANTSLEEP" "\06WSLEEP" "\07SINGLESLEEP" "\010SINTR" "\012SYSTEM" \
+     "\013TIMEOUT" "\015TRACESINGLE" "\016WEXIT" "\020OWEUPC" "\024SUSPSINGLE" 
\
+     "\030CONTINUED" "\033THREAD" "\034SUSPSIG" "\037CPUPEG")
 
 #define        THREAD_PID_OFFSET       100000
 

Reply via email to