On Fri, Dec 01, 2017 at 05:34:50PM -0500, Jason Baron wrote:
> hmmm...I'm not sure how this suggestion would change the locking rules
> from what we currently have. Right now, we use ep->lock, if we remove
> that and use ep->wq->lock instead, there is just a 1-to-1 mapping there
> that has not changed, since ep->wq->lock currently is completely not
> being used.

True.  The patch below survives the amazing complex booting and starting
systemd with lockdep enabled test.  Do we have something resembling a
epoll test suite?

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index afd548ebc328..2b2c5ac80e26 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -182,11 +182,10 @@ struct epitem {
  * This structure is stored inside the "private_data" member of the file
  * structure and represents the main data structure for the eventpoll
  * interface.
+ *
+ * Access to it is protected by the lock inside wq.
  */
 struct eventpoll {
-       /* Protect the access to this structure */
-       spinlock_t lock;
-
        /*
         * This mutex is used to ensure that files are not removed
         * while epoll is using them. This is held during the event
@@ -210,7 +209,7 @@ struct eventpoll {
        /*
         * This is a single linked list that chains all the "struct epitem" that
         * happened while transferring ready events to userspace w/out
-        * holding ->lock.
+        * holding ->wq.lock.
         */
        struct epitem *ovflist;
 
@@ -686,17 +685,17 @@ static int ep_scan_ready_list(struct eventpoll *ep,
         * because we want the "sproc" callback to be able to do it
         * in a lockless way.
         */
-       spin_lock_irqsave(&ep->lock, flags);
+       spin_lock_irqsave(&ep->wq.lock, flags);
        list_splice_init(&ep->rdllist, &txlist);
        ep->ovflist = NULL;
-       spin_unlock_irqrestore(&ep->lock, flags);
+       spin_unlock_irqrestore(&ep->wq.lock, flags);
 
        /*
         * Now call the callback function.
         */
        error = (*sproc)(ep, &txlist, priv);
 
-       spin_lock_irqsave(&ep->lock, flags);
+       spin_lock_irqsave(&ep->wq.lock, flags);
        /*
         * During the time we spent inside the "sproc" callback, some
         * other events might have been queued by the poll callback.
@@ -738,7 +737,7 @@ static int ep_scan_ready_list(struct eventpoll *ep,
                if (waitqueue_active(&ep->poll_wait))
                        pwake++;
        }
-       spin_unlock_irqrestore(&ep->lock, flags);
+       spin_unlock_irqrestore(&ep->wq.lock, flags);
 
        if (!ep_locked)
                mutex_unlock(&ep->mtx);
@@ -782,10 +781,10 @@ static int ep_remove(struct eventpoll *ep, struct epitem 
*epi)
 
        rb_erase_cached(&epi->rbn, &ep->rbr);
 
-       spin_lock_irqsave(&ep->lock, flags);
+       spin_lock_irqsave(&ep->wq.lock, flags);
        if (ep_is_linked(&epi->rdllink))
                list_del_init(&epi->rdllink);
-       spin_unlock_irqrestore(&ep->lock, flags);
+       spin_unlock_irqrestore(&ep->wq.lock, flags);
 
        wakeup_source_unregister(ep_wakeup_source(epi));
        /*
@@ -1015,7 +1014,6 @@ static int ep_alloc(struct eventpoll **pep)
        if (unlikely(!ep))
                goto free_uid;
 
-       spin_lock_init(&ep->lock);
        mutex_init(&ep->mtx);
        init_waitqueue_head(&ep->wq);
        init_waitqueue_head(&ep->poll_wait);
@@ -1119,7 +1117,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, 
unsigned mode, int sync, v
        struct eventpoll *ep = epi->ep;
        int ewake = 0;
 
-       spin_lock_irqsave(&ep->lock, flags);
+       spin_lock_irqsave(&ep->wq.lock, flags);
 
        ep_set_busy_poll_napi_id(epi);
 
@@ -1196,7 +1194,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, 
unsigned mode, int sync, v
                pwake++;
 
 out_unlock:
-       spin_unlock_irqrestore(&ep->lock, flags);
+       spin_unlock_irqrestore(&ep->wq.lock, flags);
 
        /* We have to call this outside the lock */
        if (pwake)
@@ -1480,7 +1478,7 @@ static int ep_insert(struct eventpoll *ep, struct 
epoll_event *event,
                goto error_remove_epi;
 
        /* We have to drop the new item inside our item list to keep track of 
it */
-       spin_lock_irqsave(&ep->lock, flags);
+       spin_lock_irqsave(&ep->wq.lock, flags);
 
        /* record NAPI ID of new item if present */
        ep_set_busy_poll_napi_id(epi);
@@ -1497,7 +1495,7 @@ static int ep_insert(struct eventpoll *ep, struct 
epoll_event *event,
                        pwake++;
        }
 
-       spin_unlock_irqrestore(&ep->lock, flags);
+       spin_unlock_irqrestore(&ep->wq.lock, flags);
 
        atomic_long_inc(&ep->user->epoll_watches);
 
@@ -1523,10 +1521,10 @@ static int ep_insert(struct eventpoll *ep, struct 
epoll_event *event,
         * list, since that is used/cleaned only inside a section bound by 
"mtx".
         * And ep_insert() is called with "mtx" held.
         */
-       spin_lock_irqsave(&ep->lock, flags);
+       spin_lock_irqsave(&ep->wq.lock, flags);
        if (ep_is_linked(&epi->rdllink))
                list_del_init(&epi->rdllink);
-       spin_unlock_irqrestore(&ep->lock, flags);
+       spin_unlock_irqrestore(&ep->wq.lock, flags);
 
        wakeup_source_unregister(ep_wakeup_source(epi));
 
@@ -1593,7 +1591,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem 
*epi, struct epoll_even
         * list, push it inside.
         */
        if (revents & event->events) {
-               spin_lock_irq(&ep->lock);
+               spin_lock_irq(&ep->wq.lock);
                if (!ep_is_linked(&epi->rdllink)) {
                        list_add_tail(&epi->rdllink, &ep->rdllist);
                        ep_pm_stay_awake(epi);
@@ -1604,7 +1602,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem 
*epi, struct epoll_even
                        if (waitqueue_active(&ep->poll_wait))
                                pwake++;
                }
-               spin_unlock_irq(&ep->lock);
+               spin_unlock_irq(&ep->wq.lock);
        }
 
        /* We have to call this outside the lock */
@@ -1754,7 +1752,7 @@ static int ep_poll(struct eventpoll *ep, struct 
epoll_event __user *events,
                 * caller specified a non blocking operation.
                 */
                timed_out = 1;
-               spin_lock_irqsave(&ep->lock, flags);
+               spin_lock_irqsave(&ep->wq.lock, flags);
                goto check_events;
        }
 
@@ -1763,7 +1761,7 @@ static int ep_poll(struct eventpoll *ep, struct 
epoll_event __user *events,
        if (!ep_events_available(ep))
                ep_busy_loop(ep, timed_out);
 
-       spin_lock_irqsave(&ep->lock, flags);
+       spin_lock_irqsave(&ep->wq.lock, flags);
 
        if (!ep_events_available(ep)) {
                /*
@@ -1805,11 +1803,11 @@ static int ep_poll(struct eventpoll *ep, struct 
epoll_event __user *events,
                                break;
                        }
 
-                       spin_unlock_irqrestore(&ep->lock, flags);
+                       spin_unlock_irqrestore(&ep->wq.lock, flags);
                        if (!schedule_hrtimeout_range(to, slack, 
HRTIMER_MODE_ABS))
                                timed_out = 1;
 
-                       spin_lock_irqsave(&ep->lock, flags);
+                       spin_lock_irqsave(&ep->wq.lock, flags);
                }
 
                __remove_wait_queue(&ep->wq, &wait);
@@ -1819,7 +1817,7 @@ static int ep_poll(struct eventpoll *ep, struct 
epoll_event __user *events,
        /* Is it worth to try to dig for events ? */
        eavail = ep_events_available(ep);
 
-       spin_unlock_irqrestore(&ep->lock, flags);
+       spin_unlock_irqrestore(&ep->wq.lock, flags);
 
        /*
         * Try to transfer events to user space. In case we get 0 events and

Reply via email to