On Mon, Dec 10, 2018 at 06:43:51PM +0100, Thomas Gleixner wrote:
> On Mon, 10 Dec 2018, Peter Zijlstra wrote:
> > On Mon, Dec 10, 2018 at 04:23:06PM +0100, Thomas Gleixner wrote:
> > There is another callers of futex_lock_pi_atomic(),
> > futex_proxy_trylock_atomic(), which is part of futex_requeue(), that too
> > does a retry loop on -EAGAIN.
> > 
> > And there is another caller of attach_to_pi_owner(): lookup_pi_state(),
> > and that too is in futex_requeue() and handles the retry case properly.
> > 
> > Yes, this all looks good.
> > 
> > Acked-by: Peter Zijlstra (Intel) <[email protected]>
> 
> Bah. The little devil in the unconcious part of my brain insisted on
> thinking further about that EAGAIN loop even despite my attempt to page
> that futex horrors out again immediately after sending that patch.
> 
> There is another related issue which is even worse than just mildly
> confusing user space:
> 
>    task1(SCHED_OTHER)
>    sys_exit()
>      do_exit()
>       exit_mm()
>        task1->flags |= PF_EXITING;
> 
>    ---> preemption
> 
>    task2(SCHED_FIFO)
>      sys_futex(LOCK_PI)
>        ....
>        attach_to_pi_owner() {
>          ...
>          if (!task1->flags & PF_EXITING) {
>            attach();
>          } else {
>               if (!(tsk->flags & PF_EXITPIDONE))
>                return -EAGAIN;
> 
> Now assume UP or both tasks pinned on the same CPU. That results in a
> livelock because task2 is going to loop forever.
> 
> No immediate idea how to cure that one w/o creating a mess.

One possible; but fairly gruesome hack; would be something like the
below.

Now, this obviously introduces a priority inversion, but that's
arguablly better than a live-lock, also I'm not sure there's really
anything 'sane' you can do in the case where your lock holder is dying
instead of doing a proper unlock anyway.

But no, I'm not liking this much either...

diff --git a/kernel/exit.c b/kernel/exit.c
index 0e21e6d21f35..bc6a01112d9d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -806,6 +806,8 @@ void __noreturn do_exit(long code)
                 * task into the wait for ever nirwana as well.
                 */
                tsk->flags |= PF_EXITPIDONE;
+               smp_mb();
+               wake_up_bit(&tsk->flags, 3 /* PF_EXITPIDONE */);
                set_current_state(TASK_UNINTERRUPTIBLE);
                schedule();
        }
diff --git a/kernel/futex.c b/kernel/futex.c
index f423f9b6577e..a743d657e783 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1148,8 +1148,8 @@ static int attach_to_pi_state(u32 __user *uaddr, u32 uval,
  * Lookup the task for the TID provided from user space and attach to
  * it after doing proper sanity checks.
  */
-static int attach_to_pi_owner(u32 uval, union futex_key *key,
-                             struct futex_pi_state **ps)
+static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key 
*key,
+                             struct futex_pi_state **ps, struct task_struct 
**pe)
 {
        pid_t pid = uval & FUTEX_TID_MASK;
        struct futex_pi_state *pi_state;
@@ -1187,10 +1236,15 @@ static int attach_to_pi_owner(u32 uval, union futex_key 
*key,
                 * set, we know that the task has finished the
                 * cleanup:
                 */
                int ret = handle_exit_race(uaddr, uval, p);
 
                raw_spin_unlock_irq(&p->pi_lock);
-               put_task_struct(p);
+
+               if (ret == -EAGAIN)
+                       *pe = p;
+               else
+                       put_task_struct(p);
+
                return ret;
        }
 
@@ -1244,7 +1298,7 @@ static int lookup_pi_state(u32 __user *uaddr, u32 uval,
         * We are the first waiter - try to look up the owner based on
         * @uval and attach to it.
         */
-       return attach_to_pi_owner(uval, key, ps);
+       return attach_to_pi_owner(uaddr, uval, key, ps);
 }
 
 static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
@@ -1282,7 +1336,8 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 
uval, u32 newval)
 static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket 
*hb,
                                union futex_key *key,
                                struct futex_pi_state **ps,
-                               struct task_struct *task, int set_waiters)
+                               struct task_struct *task, int set_waiters,
+                               struct task_struct **exiting)
 {
        u32 uval, newval, vpid = task_pid_vnr(task);
        struct futex_q *top_waiter;
@@ -1352,7 +1407,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct 
futex_hash_bucket *hb,
         * attach to the owner. If that fails, no harm done, we only
         * set the FUTEX_WAITERS bit in the user space variable.
         */
-       return attach_to_pi_owner(uval, key, ps);
+       return attach_to_pi_owner(uaddr, uval, key, ps, exiting);
 }
 
 /**
@@ -2716,6 +2771,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int 
flags,
        struct rt_mutex_waiter rt_waiter;
        struct futex_hash_bucket *hb;
        struct futex_q q = futex_q_init;
+       struct task_struct *exiting;
        int res, ret;
 
        if (!IS_ENABLED(CONFIG_FUTEX_PI))
@@ -2733,6 +2789,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int 
flags,
        }
 
 retry:
+       exiting = NULL;
        ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, VERIFY_WRITE);
        if (unlikely(ret != 0))
                goto out;
@@ -2740,7 +2797,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int 
flags,
 retry_private:
        hb = queue_lock(&q);
 
-       ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
+       ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0, 
&exiting);
        if (unlikely(ret)) {
                /*
                 * Atomic work succeeded and we got the lock,
@@ -2762,6 +2819,12 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int 
flags,
                         */
                        queue_unlock(hb);
                        put_futex_key(&q.key);
+
+                       if (exiting) {
+                               wait_bit(&exiting->flags, 3 /* PF_EXITPIDONE 
*/, TASK_UNINTERRUPTIBLE);
+                               put_task_struct(exiting);
+                       }
+
                        cond_resched();
                        goto retry;
                default:

Reply via email to