3.12.30-rt40 + patch has been running your testcase, futextests and stockfish concurrently (on a 28 core +ht box) for over an hour now.
On Sat, 2014-10-25 at 20:20 -0400, Brian Silverman wrote: > free_pi_state and exit_pi_state_list both clean up futex_pi_state's. > exit_pi_state_list takes the hb lock first, and most callers of > free_pi_state do too. requeue_pi didn't, which means free_pi_state can > free the pi_state out from under exit_pi_state_list. For example: > task A | task B > exit_pi_state_list | > pi_state = | > curr->pi_state_list->next | > | futex_requeue(requeue_pi=1) > | // pi_state is the same as > | // the one in task A > | free_pi_state(pi_state) > | list_del_init(&pi_state->list) > | kfree(pi_state) > list_del_init(&pi_state->list) | > > Move the free_pi_state calls in requeue_pi to before it drops the hb > locks which it's already holding. > > I have test code that forks a bunch of processes, which all fork more > processes that do futex(FUTEX_WAIT_REQUEUE_PI), then do > futex(FUTEX_CMP_REQUEUE_PI) before killing the waiting processes. That > test consistently breaks QEMU VMs without this patch. > > Although they both make it faster to crash, CONFIG_PREEMPT_RT and lots > of CPUs are not necessary to reproduce this problem. The symptoms range > from random reboots to corrupting the test program to corrupting whole > filesystems to strange BIOS errors. Crashdumps (when they get created at > all) are usually a mess, to the point of crashing tools used to examine > them. > > Signed-off-by: Brian Silverman <bsilver16...@gmail.com> > --- > kernel/futex.c | 44 +++++++++++++++++++++++++++----------------- > 1 file changed, 27 insertions(+), 17 deletions(-) > > diff --git a/kernel/futex.c b/kernel/futex.c > index 815d7af..c2c35a5 100644 > --- a/kernel/futex.c > +++ b/kernel/futex.c > @@ -64,6 +64,7 @@ > #include <linux/hugetlb.h> > #include <linux/freezer.h> > #include <linux/bootmem.h> > +#include <linux/lockdep.h> > > #include <asm/futex.h> > > @@ -639,8 +640,16 @@ static struct futex_pi_state * alloc_pi_state(void) > return pi_state; > } > > -static void free_pi_state(struct futex_pi_state *pi_state) > +/** > + * Must be called with the hb lock held. > + */ > +static void free_pi_state(struct futex_pi_state *pi_state, > + struct futex_hash_bucket *hb) > { > + if (!pi_state) return; > + > + lockdep_assert_held(&hb->lock); > + > if (!atomic_dec_and_test(&pi_state->refcount)) > return; > > @@ -1519,15 +1528,6 @@ static int futex_requeue(u32 __user *uaddr1, unsigned > int flags, > } > > retry: > - if (pi_state != NULL) { > - /* > - * We will have to lookup the pi_state again, so free this one > - * to keep the accounting correct. > - */ > - free_pi_state(pi_state); > - pi_state = NULL; > - } > - > ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ); > if (unlikely(ret != 0)) > goto out; > @@ -1558,6 +1558,12 @@ retry_private: > ret = get_futex_value_locked(&curval, uaddr1); > > if (unlikely(ret)) { > + /* > + * We will have to lookup the pi_state again, so > + * free this one to keep the accounting correct. > + */ > + free_pi_state(pi_state, hb2); > + pi_state = NULL; > double_unlock_hb(hb1, hb2); > hb_waiters_dec(hb2); > > @@ -1617,6 +1623,8 @@ retry_private: > case 0: > break; > case -EFAULT: > + free_pi_state(pi_state, hb2); > + pi_state = NULL; > double_unlock_hb(hb1, hb2); > hb_waiters_dec(hb2); > put_futex_key(&key2); > @@ -1632,6 +1640,8 @@ retry_private: > * exit to complete. > * - The user space value changed. > */ > + free_pi_state(pi_state, hb2); > + pi_state = NULL; > double_unlock_hb(hb1, hb2); > hb_waiters_dec(hb2); > put_futex_key(&key2); > @@ -1699,7 +1709,7 @@ retry_private: > } else if (ret) { > /* -EDEADLK */ > this->pi_state = NULL; > - free_pi_state(pi_state); > + free_pi_state(pi_state, hb2); > goto out_unlock; > } > } > @@ -1708,6 +1718,7 @@ retry_private: > } > > out_unlock: > + free_pi_state(pi_state, hb2); > double_unlock_hb(hb1, hb2); > hb_waiters_dec(hb2); > > @@ -1725,8 +1736,6 @@ out_put_keys: > out_put_key1: > put_futex_key(&key1); > out: > - if (pi_state != NULL) > - free_pi_state(pi_state); > return ret ? ret : task_count; > } > > @@ -1850,14 +1859,15 @@ retry: > * PI futexes can not be requeued and must remove themself from the > * hash bucket. The hash bucket lock (i.e. lock_ptr) is held on entry > * and dropped here. > + * Must be called with the hb lock held. > */ > -static void unqueue_me_pi(struct futex_q *q) > +static void unqueue_me_pi(struct futex_q *q, struct futex_hash_bucket *hb) > __releases(q->lock_ptr) > { > __unqueue_futex(q); > > BUG_ON(!q->pi_state); > - free_pi_state(q->pi_state); > + free_pi_state(q->pi_state, hb); > q->pi_state = NULL; > > spin_unlock(q->lock_ptr); > @@ -2340,7 +2350,7 @@ retry_private: > rt_mutex_unlock(&q.pi_state->pi_mutex); > > /* Unqueue and drop the lock */ > - unqueue_me_pi(&q); > + unqueue_me_pi(&q, hb); > > goto out_put_key; > > @@ -2651,7 +2661,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, > unsigned int flags, > ret = (res < 0) ? res : 0; > > /* Unqueue and drop the lock. */ > - unqueue_me_pi(&q); > + unqueue_me_pi(&q, hb); > } > > /* -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/