Now that siglock keeps tsk->parent and tsk->real_parent constant
require that do_notify_parent_cldstop is called with tsk->siglock held
instead of the tasklist_lock.

As all of the callers of do_notify_parent_cldstop had to drop the
siglock and take tasklist_lock this simplifies all of it's callers.

This removes one reason for taking tasklist_lock.

This makes ptrace_stop so that it should reliably work correctly and
reliably with PREEMPT_RT enabled and CONFIG_CGROUPS disabled.  The
remaining challenge is that cgroup_enter_frozen takes spin_lock after
__state has been set to TASK_TRACED.  Which on PREEMPT_RT means the
code can sleep and change __state.  Not only that but it means that
wait_task_inactive could potentially detect the code scheduling away
at that point and fail, causing ptrace_check_attach to fail.

Signed-off-by: "Eric W. Biederman" <ebied...@xmission.com>
---
 kernel/signal.c | 262 ++++++++++++++++++++++++++++++++++--------------
 1 file changed, 189 insertions(+), 73 deletions(-)

diff --git a/kernel/signal.c b/kernel/signal.c
index 2cc45e8448e2..d4956be51939 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1994,6 +1994,129 @@ int send_sigqueue(struct sigqueue *q, struct pid *pid, 
enum pid_type type)
        return ret;
 }
 
+/**
+ * lock_parents_siglocks - Take current, real_parent, and parent's siglock
+ * @lock_tracer: The tracers siglock is needed.
+ *
+ * There is no natural ordering to these locks so they must be sorted
+ * before being taken.
+ *
+ * There are two complicating factors here:
+ * - The locks live in sighand and sighand can be arbitrarily shared
+ * - parent and real_parent can change when current's siglock is unlocked.
+ *
+ * To deal with this first the all of the sighand pointers are
+ * gathered under current's siglock, and the sighand pointers are
+ * sorted.  As siglock lives inside of sighand this also sorts the
+ * siglock's by address.
+ *
+ * Then the siglocks are taken in order dropping current's siglock if
+ * necessary.
+ *
+ * Finally if parent and real_parent have not changed return.
+ * If they either parent has changed drop their locks and try again.
+ *
+ * Changing sighand is an infrequent and somewhat expensive operation
+ * (unshare or exec) and so even in the worst case this loop
+ * should not loop too many times before all of the proper locks are
+ * taken in order.
+ *
+ * CONTEXT:
+ * Must be called with @current->sighand->siglock held
+ *
+ * RETURNS:
+ * current's, real_parent's, and parent's siglock held.
+ */
+static void lock_parents_siglocks(bool lock_tracer)
+       __releases(&current->sighand->siglock)
+       __acquires(&current->sighand->siglock)
+       __acquires(&current->real_parent->sighand->siglock)
+       __acquires(&current->parent->sighand->siglock)
+{
+       struct task_struct *me = current;
+       struct sighand_struct *m_sighand = me->sighand;
+
+       lockdep_assert_held(&m_sighand->siglock);
+
+       rcu_read_lock();
+       for (;;) {
+               struct task_struct *parent, *tracer;
+               struct sighand_struct *p_sighand, *t_sighand, *s1, *s2, *s3;
+
+               parent = me->real_parent;
+               tracer = ptrace_parent(me);
+               if (!tracer || !lock_tracer)
+                       tracer = parent;
+
+               p_sighand = rcu_dereference(parent->sighand);
+               t_sighand = rcu_dereference(tracer->sighand);
+
+               /* Sort the sighands so that s1 >= s2 >= s3 */
+               s1 = m_sighand;
+               s2 = p_sighand;
+               s3 = t_sighand;
+               if (s1 > s2)
+                       swap(s1, s2);
+               if (s1 > s3)
+                       swap(s1, s3);
+               if (s2 > s3)
+                       swap(s2, s3);
+
+               /* Take the locks in order */
+               if (s1 != m_sighand) {
+                       spin_unlock(&m_sighand->siglock);
+                       spin_lock(&s1->siglock);
+               }
+               if (s1 != s2)
+                       spin_lock_nested(&s2->siglock, 1);
+               if (s2 != s3)
+                       spin_lock_nested(&s3->siglock, 2);
+
+               /* Verify the proper locks are held */
+               if (likely((s1 == m_sighand) ||
+                          ((me->real_parent == parent) &&
+                           (me->parent == tracer) &&
+                           (parent->sighand == p_sighand) &&
+                           (tracer->sighand == t_sighand)))) {
+                       break;
+               }
+
+               /* Drop all but current's siglock */
+               if (p_sighand != m_sighand)
+                       spin_unlock(&p_sighand->siglock);
+               if (t_sighand != p_sighand)
+                       spin_unlock(&t_sighand->siglock);
+
+               /*
+                * Since [pt]_sighand will likely change if we go
+                * around, and m_sighand is the only one held, make sure
+                * it is subclass-0, since the above 's1 != m_sighand'
+                * clause very much relies on that.
+                */
+               lock_set_subclass(&m_sighand->siglock.dep_map, 0, _RET_IP_);
+       }
+       rcu_read_unlock();
+}
+
+static void unlock_parents_siglocks(bool unlock_tracer)
+       __releases(&current->real_parent->sighand->siglock)
+       __releases(&current->parent->sighand->siglock)
+{
+       struct task_struct *me = current;
+       struct task_struct *parent = me->real_parent;
+       struct task_struct *tracer = ptrace_parent(me);
+       struct sighand_struct *m_sighand = me->sighand;
+       struct sighand_struct *p_sighand = parent->sighand;
+
+       if (p_sighand != m_sighand)
+               spin_unlock(&p_sighand->siglock);
+       if (tracer && unlock_tracer) {
+               struct sighand_struct *t_sighand = tracer->sighand;
+               if (t_sighand != p_sighand)
+                       spin_unlock(&t_sighand->siglock);
+       }
+}
+
 static void do_notify_pidfd(struct task_struct *task)
 {
        struct pid *pid;
@@ -2125,11 +2248,12 @@ static void do_notify_parent_cldstop(struct task_struct 
*tsk,
                                     bool for_ptracer, int why)
 {
        struct kernel_siginfo info;
-       unsigned long flags;
        struct task_struct *parent;
        struct sighand_struct *sighand;
        u64 utime, stime;
 
+       lockdep_assert_held(&tsk->sighand->siglock);
+
        if (for_ptracer) {
                parent = tsk->parent;
        } else {
@@ -2137,6 +2261,8 @@ static void do_notify_parent_cldstop(struct task_struct 
*tsk,
                parent = tsk->real_parent;
        }
 
+       lockdep_assert_held(&parent->sighand->siglock);
+
        clear_siginfo(&info);
        info.si_signo = SIGCHLD;
        info.si_errno = 0;
@@ -2168,7 +2294,6 @@ static void do_notify_parent_cldstop(struct task_struct 
*tsk,
        }
 
        sighand = parent->sighand;
-       spin_lock_irqsave(&sighand->siglock, flags);
        if (sighand->action[SIGCHLD-1].sa.sa_handler != SIG_IGN &&
            !(sighand->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP))
                send_signal_locked(SIGCHLD, &info, parent, PIDTYPE_TGID);
@@ -2176,7 +2301,6 @@ static void do_notify_parent_cldstop(struct task_struct 
*tsk,
         * Even if SIGCHLD is not generated, we must wake up wait4 calls.
         */
        __wake_up_parent(tsk, parent);
-       spin_unlock_irqrestore(&sighand->siglock, flags);
 }
 
 /*
@@ -2208,14 +2332,18 @@ static void ptrace_stop(int exit_code, int why, 
unsigned long message,
                spin_lock_irq(&current->sighand->siglock);
        }
 
+       lock_parents_siglocks(true);
        /*
         * After this point ptrace_signal_wake_up or signal_wake_up
         * will clear TASK_TRACED if ptrace_unlink happens or a fatal
         * signal comes in.  Handle previous ptrace_unlinks and fatal
         * signals here to prevent ptrace_stop sleeping in schedule.
         */
-       if (!current->ptrace || __fatal_signal_pending(current))
+
+       if (!current->ptrace || __fatal_signal_pending(current)) {
+               unlock_parents_siglocks(true);
                return;
+       }
 
        set_special_state(TASK_TRACED);
        current->jobctl |= JOBCTL_TRACED;
@@ -2254,16 +2382,6 @@ static void ptrace_stop(int exit_code, int why, unsigned 
long message,
        if (why == CLD_STOPPED && (current->jobctl & JOBCTL_STOP_PENDING))
                gstop_done = task_participate_group_stop(current);
 
-       /* any trap clears pending STOP trap, STOP trap clears NOTIFY */
-       task_clear_jobctl_pending(current, JOBCTL_TRAP_STOP);
-       if (info && info->si_code >> 8 == PTRACE_EVENT_STOP)
-               task_clear_jobctl_pending(current, JOBCTL_TRAP_NOTIFY);
-
-       /* entering a trap, clear TRAPPING */
-       task_clear_jobctl_trapping(current);
-
-       spin_unlock_irq(&current->sighand->siglock);
-       read_lock(&tasklist_lock);
        /*
         * Notify parents of the stop.
         *
@@ -2279,14 +2397,25 @@ static void ptrace_stop(int exit_code, int why, 
unsigned long message,
        if (gstop_done && (!current->ptrace || ptrace_reparented(current)))
                do_notify_parent_cldstop(current, false, why);
 
+       unlock_parents_siglocks(true);
+
+       /* any trap clears pending STOP trap, STOP trap clears NOTIFY */
+       task_clear_jobctl_pending(current, JOBCTL_TRAP_STOP);
+       if (info && info->si_code >> 8 == PTRACE_EVENT_STOP)
+               task_clear_jobctl_pending(current, JOBCTL_TRAP_NOTIFY);
+
+       /* entering a trap, clear TRAPPING */
+       task_clear_jobctl_trapping(current);
+
        /*
         * Don't want to allow preemption here, because
         * sys_ptrace() needs this task to be inactive.
         *
-        * XXX: implement read_unlock_no_resched().
+        * XXX: implement spin_unlock_no_resched().
         */
        preempt_disable();
-       read_unlock(&tasklist_lock);
+       spin_unlock_irq(&current->sighand->siglock);
+
        cgroup_enter_frozen();
        preempt_enable_no_resched();
        freezable_schedule();
@@ -2361,8 +2490,8 @@ int ptrace_notify(int exit_code, unsigned long message)
  * on %true return.
  *
  * RETURNS:
- * %false if group stop is already cancelled or ptrace trap is scheduled.
- * %true if participated in group stop.
+ * %false if group stop is already cancelled.
+ * %true otherwise (as lock_parents_siglocks may have dropped siglock).
  */
 static bool do_signal_stop(int signr)
        __releases(&current->sighand->siglock)
@@ -2425,36 +2554,24 @@ static bool do_signal_stop(int signr)
                }
        }
 
+       lock_parents_siglocks(false);
+       /* Recheck JOBCTL_STOP_PENDING after unlock+lock of siglock */
+       if (unlikely(!(current->jobctl & JOBCTL_STOP_PENDING)))
+               goto out;
        if (likely(!current->ptrace)) {
-               int notify = 0;
-
                /*
                 * If there are no other threads in the group, or if there
                 * is a group stop in progress and we are the last to stop,
-                * report to the parent.
+                * report to the real_parent.
                 */
                if (task_participate_group_stop(current))
-                       notify = CLD_STOPPED;
+                       do_notify_parent_cldstop(current, false, CLD_STOPPED);
+               unlock_parents_siglocks(false);
 
                current->jobctl |= JOBCTL_STOPPED;
                set_special_state(TASK_STOPPED);
                spin_unlock_irq(&current->sighand->siglock);
 
-               /*
-                * Notify the parent of the group stop completion.  Because
-                * we're not holding either the siglock or tasklist_lock
-                * here, ptracer may attach inbetween; however, this is for
-                * group stop and should always be delivered to the real
-                * parent of the group leader.  The new ptracer will get
-                * its notification when this task transitions into
-                * TASK_TRACED.
-                */
-               if (notify) {
-                       read_lock(&tasklist_lock);
-                       do_notify_parent_cldstop(current, false, notify);
-                       read_unlock(&tasklist_lock);
-               }
-
                /* Now we don't run again until woken by SIGCONT or SIGKILL */
                cgroup_enter_frozen();
                freezable_schedule();
@@ -2465,8 +2582,11 @@ static bool do_signal_stop(int signr)
                 * Schedule it and let the caller deal with it.
                 */
                task_set_jobctl_pending(current, JOBCTL_TRAP_STOP);
-               return false;
        }
+out:
+       unlock_parents_siglocks(false);
+       spin_unlock_irq(&current->sighand->siglock);
+       return true;
 }
 
 /**
@@ -2624,32 +2744,30 @@ bool get_signal(struct ksignal *ksig)
        if (unlikely(signal->flags & SIGNAL_CLD_MASK)) {
                int why;
 
-               if (signal->flags & SIGNAL_CLD_CONTINUED)
-                       why = CLD_CONTINUED;
-               else
-                       why = CLD_STOPPED;
+               lock_parents_siglocks(true);
+               /* Recheck signal->flags after unlock+lock of siglock */
+               if (likely(signal->flags & SIGNAL_CLD_MASK)) {
+                       if (signal->flags & SIGNAL_CLD_CONTINUED)
+                               why = CLD_CONTINUED;
+                       else
+                               why = CLD_STOPPED;
 
-               signal->flags &= ~SIGNAL_CLD_MASK;
+                       signal->flags &= ~SIGNAL_CLD_MASK;
 
-               spin_unlock_irq(&sighand->siglock);
-
-               /*
-                * Notify the parent that we're continuing.  This event is
-                * always per-process and doesn't make whole lot of sense
-                * for ptracers, who shouldn't consume the state via
-                * wait(2) either, but, for backward compatibility, notify
-                * the ptracer of the group leader too unless it's gonna be
-                * a duplicate.
-                */
-               read_lock(&tasklist_lock);
-               do_notify_parent_cldstop(current, false, why);
-
-               if (ptrace_reparented(current->group_leader))
-                       do_notify_parent_cldstop(current->group_leader,
-                                               true, why);
-               read_unlock(&tasklist_lock);
-
-               goto relock;
+                       /*
+                        * Notify the parent that we're continuing.  This event 
is
+                        * always per-process and doesn't make whole lot of 
sense
+                        * for ptracers, who shouldn't consume the state via
+                        * wait(2) either, but, for backward compatibility, 
notify
+                        * the ptracer of the group leader too unless it's 
gonna be
+                        * a duplicate.
+                        */
+                       do_notify_parent_cldstop(current, false, why);
+                       if (ptrace_reparented(current->group_leader))
+                               do_notify_parent_cldstop(current->group_leader,
+                                                        true, why);
+               }
+               unlock_parents_siglocks(true);
        }
 
        for (;;) {
@@ -2906,7 +3024,6 @@ static void retarget_shared_pending(struct task_struct 
*tsk, sigset_t *which)
 
 void exit_signals(struct task_struct *tsk)
 {
-       int group_stop = 0;
        sigset_t unblocked;
 
        /*
@@ -2937,21 +3054,20 @@ void exit_signals(struct task_struct *tsk)
        signotset(&unblocked);
        retarget_shared_pending(tsk, &unblocked);
 
-       if (unlikely(tsk->jobctl & JOBCTL_STOP_PENDING) &&
-           task_participate_group_stop(tsk))
-               group_stop = CLD_STOPPED;
-out:
-       spin_unlock_irq(&tsk->sighand->siglock);
-
        /*
         * If group stop has completed, deliver the notification.  This
         * should always go to the real parent of the group leader.
         */
-       if (unlikely(group_stop)) {
-               read_lock(&tasklist_lock);
-               do_notify_parent_cldstop(tsk, false, group_stop);
-               read_unlock(&tasklist_lock);
+       if (unlikely(tsk->jobctl & JOBCTL_STOP_PENDING)) {
+               lock_parents_siglocks(false);
+               /* Recheck JOBCTL_STOP_PENDING after unlock+lock of siglock */
+               if ((tsk->jobctl & JOBCTL_STOP_PENDING) &&
+                   task_participate_group_stop(tsk))
+                       do_notify_parent_cldstop(tsk, false, CLD_STOPPED);
+               unlock_parents_siglocks(false);
        }
+out:
+       spin_unlock_irq(&tsk->sighand->siglock);
 }
 
 /*
-- 
2.35.3


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um

Reply via email to