2.6.32-longterm review patch.  If anyone has any objections, please let me know.

------------------

From: Oleg Nesterov <o...@redhat.com>

commit 971316f0503a5c50633d07b83b6db2f15a3a5b00 upstream.

signalfd_cleanup() ensures that ->signalfd_wqh is not used, but
this is not enough. eppoll_entry->whead still points to the memory
we are going to free, ep_unregister_pollwait()->remove_wait_queue()
is obviously unsafe.

Change ep_poll_callback(POLLFREE) to set eppoll_entry->whead = NULL,
change ep_unregister_pollwait() to check pwq->whead != NULL under
rcu_read_lock() before remove_wait_queue(). We add the new helper,
ep_remove_wait_queue(), for this.

This works because sighand_cachep is SLAB_DESTROY_BY_RCU and because
->signalfd_wqh is initialized in sighand_ctor(), not in copy_sighand.
ep_unregister_pollwait()->remove_wait_queue() can play with already
freed and potentially reused ->sighand, but this is fine. This memory
must have the valid ->signalfd_wqh until rcu_read_unlock().

Reported-by: Maxime Bizon <mbi...@freebox.fr>
Signed-off-by: Oleg Nesterov <o...@redhat.com>
Signed-off-by: Linus Torvalds <torva...@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gre...@linuxfoundation.org>
Signed-off-by: Willy Tarreau <w...@1wt.eu>
---
 fs/eventpoll.c |   30 +++++++++++++++++++++++++++---
 fs/signalfd.c  |    6 +++++-
 2 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 15a7ef3..42f2c12 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -300,6 +300,11 @@ static inline int ep_is_linked(struct list_head *p)
        return !list_empty(p);
 }
 
+static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_t *p)
+{
+       return container_of(p, struct eppoll_entry, wait);
+}
+
 /* Get the "struct epitem" from a wait queue pointer */
 static inline struct epitem *ep_item_from_wait(wait_queue_t *p)
 {
@@ -434,6 +439,18 @@ static void ep_poll_safewake(wait_queue_head_t *wq)
        put_cpu();
 }
 
+static void ep_remove_wait_queue(struct eppoll_entry *pwq)
+{
+       wait_queue_head_t *whead;
+
+       rcu_read_lock();
+       /* If it is cleared by POLLFREE, it should be rcu-safe */
+       whead = rcu_dereference(pwq->whead);
+       if (whead)
+               remove_wait_queue(whead, &pwq->wait);
+       rcu_read_unlock();
+}
+
 /*
  * This function unregisters poll callbacks from the associated file
  * descriptor.  Must be called with "mtx" held (or "epmutex" if called from
@@ -448,7 +465,7 @@ static void ep_unregister_pollwait(struct eventpoll *ep, 
struct epitem *epi)
                pwq = list_first_entry(lsthead, struct eppoll_entry, llink);
 
                list_del(&pwq->llink);
-               remove_wait_queue(pwq->whead, &pwq->wait);
+               ep_remove_wait_queue(pwq);
                kmem_cache_free(pwq_cache, pwq);
        }
 }
@@ -814,9 +831,16 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned 
mode, int sync, void *k
        struct epitem *epi = ep_item_from_wait(wait);
        struct eventpoll *ep = epi->ep;
 
-       /* the caller holds eppoll_entry->whead->lock */
-       if ((unsigned long)key & POLLFREE)
+       if ((unsigned long)key & POLLFREE) {
+               ep_pwq_from_wait(wait)->whead = NULL;
+               /*
+                * whead = NULL above can race with ep_remove_wait_queue()
+                * which can do another remove_wait_queue() after us, so we
+                * can't use __remove_wait_queue(). whead->lock is held by
+                * the caller.
+                */
                list_del_init(&wait->task_list);
+       }
 
        spin_lock_irqsave(&ep->lock, flags);
 
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 6339cb4..02c25d7 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -32,7 +32,11 @@
 void signalfd_cleanup(struct sighand_struct *sighand)
 {
        wait_queue_head_t *wqh = &sighand->signalfd_wqh;
-
+       /*
+        * The lockless check can race with remove_wait_queue() in progress,
+        * but in this case its caller should run under rcu_read_lock() and
+        * sighand_cachep is SLAB_DESTROY_BY_RCU, we can safely return.
+        */
        if (likely(!waitqueue_active(wqh)))
                return;
 
-- 
1.7.2.1.45.g54fbc



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to