Don't use the standard mutex for the I/O operation lock, but rather
implement our own as the standard mutex must be released in the same thread
as locked it.  This is a problem when it comes to doing async FetchData
where the lock will be dropped from the workqueue that processed the
incoming data and not from the issuing thread.

Signed-off-by: David Howells <dhowe...@redhat.com>
cc: Marc Dionne <marc.dio...@auristor.com>
cc: linux-...@lists.infradead.org
---
 fs/afs/fs_operation.c | 111 +++++++++++++++++++++++++++++++++++++++---
 fs/afs/internal.h     |   3 +-
 fs/afs/super.c        |   2 +-
 3 files changed, 108 insertions(+), 8 deletions(-)

diff --git a/fs/afs/fs_operation.c b/fs/afs/fs_operation.c
index 428721bbe4f6..0175d7a31332 100644
--- a/fs/afs/fs_operation.c
+++ b/fs/afs/fs_operation.c
@@ -49,6 +49,105 @@ struct afs_operation *afs_alloc_operation(struct key *key, 
struct afs_volume *vo
        return op;
 }
 
+struct afs_io_locker {
+       struct list_head        link;
+       struct task_struct      *task;
+       unsigned long           have_lock;
+};
+
+/*
+ * Unlock the I/O lock on a vnode.
+ */
+static void afs_unlock_for_io(struct afs_vnode *vnode)
+{
+       struct afs_io_locker *locker;
+
+       spin_lock(&vnode->lock);
+       locker = list_first_entry_or_null(&vnode->io_lock_waiters,
+                                         struct afs_io_locker, link);
+       if (locker) {
+               list_del(&locker->link);
+               smp_store_release(&locker->have_lock, 1); /* The unlock 
barrier. */
+               smp_mb__after_atomic(); /* Store have_lock before task state */
+               wake_up_process(locker->task);
+       } else {
+               clear_bit(AFS_VNODE_IO_LOCK, &vnode->flags);
+       }
+       spin_unlock(&vnode->lock);
+}
+
+/*
+ * Lock the I/O lock on a vnode uninterruptibly.  We can't use an ordinary
+ * mutex as lockdep will complain if we unlock it in the wrong thread.
+ */
+static void afs_lock_for_io(struct afs_vnode *vnode)
+{
+       struct afs_io_locker myself = { .task = current, };
+
+       spin_lock(&vnode->lock);
+
+       if (!test_and_set_bit(AFS_VNODE_IO_LOCK, &vnode->flags)) {
+               spin_unlock(&vnode->lock);
+               return;
+       }
+
+       list_add_tail(&myself.link, &vnode->io_lock_waiters);
+       spin_unlock(&vnode->lock);
+
+       for (;;) {
+               set_current_state(TASK_UNINTERRUPTIBLE);
+               if (smp_load_acquire(&myself.have_lock)) /* The lock barrier */
+                       break;
+               schedule();
+       }
+       __set_current_state(TASK_RUNNING);
+}
+
+/*
+ * Lock the I/O lock on a vnode interruptibly.  We can't use an ordinary mutex
+ * as lockdep will complain if we unlock it in the wrong thread.
+ */
+static int afs_lock_for_io_interruptible(struct afs_vnode *vnode)
+{
+       struct afs_io_locker myself = { .task = current, };
+       int ret = 0;
+
+       spin_lock(&vnode->lock);
+
+       if (!test_and_set_bit(AFS_VNODE_IO_LOCK, &vnode->flags)) {
+               spin_unlock(&vnode->lock);
+               return 0;
+       }
+
+       list_add_tail(&myself.link, &vnode->io_lock_waiters);
+       spin_unlock(&vnode->lock);
+
+       for (;;) {
+               set_current_state(TASK_INTERRUPTIBLE);
+               if (smp_load_acquire(&myself.have_lock) || /* The lock barrier 
*/
+                   signal_pending(current))
+                       break;
+               schedule();
+       }
+       __set_current_state(TASK_RUNNING);
+
+       /* If we got a signal, try to transfer the lock onto the next
+        * waiter.
+        */
+       if (unlikely(signal_pending(current))) {
+               spin_lock(&vnode->lock);
+               if (myself.have_lock) {
+                       spin_unlock(&vnode->lock);
+                       afs_unlock_for_io(vnode);
+               } else {
+                       list_del(&myself.link);
+                       spin_unlock(&vnode->lock);
+               }
+               ret = -ERESTARTSYS;
+       }
+       return ret;
+}
+
 /*
  * Lock the vnode(s) being operated upon.
  */
@@ -60,7 +159,7 @@ static bool afs_get_io_locks(struct afs_operation *op)
        _enter("");
 
        if (op->flags & AFS_OPERATION_UNINTR) {
-               mutex_lock(&vnode->io_lock);
+               afs_lock_for_io(vnode);
                op->flags |= AFS_OPERATION_LOCK_0;
                _leave(" = t [1]");
                return true;
@@ -72,7 +171,7 @@ static bool afs_get_io_locks(struct afs_operation *op)
        if (vnode2 > vnode)
                swap(vnode, vnode2);
 
-       if (mutex_lock_interruptible(&vnode->io_lock) < 0) {
+       if (afs_lock_for_io_interruptible(vnode) < 0) {
                afs_op_set_error(op, -ERESTARTSYS);
                op->flags |= AFS_OPERATION_STOP;
                _leave(" = f [I 0]");
@@ -81,10 +180,10 @@ static bool afs_get_io_locks(struct afs_operation *op)
        op->flags |= AFS_OPERATION_LOCK_0;
 
        if (vnode2) {
-               if (mutex_lock_interruptible_nested(&vnode2->io_lock, 1) < 0) {
+               if (afs_lock_for_io_interruptible(vnode2) < 0) {
                        afs_op_set_error(op, -ERESTARTSYS);
                        op->flags |= AFS_OPERATION_STOP;
-                       mutex_unlock(&vnode->io_lock);
+                       afs_unlock_for_io(vnode);
                        op->flags &= ~AFS_OPERATION_LOCK_0;
                        _leave(" = f [I 1]");
                        return false;
@@ -104,9 +203,9 @@ static void afs_drop_io_locks(struct afs_operation *op)
        _enter("");
 
        if (op->flags & AFS_OPERATION_LOCK_1)
-               mutex_unlock(&vnode2->io_lock);
+               afs_unlock_for_io(vnode2);
        if (op->flags & AFS_OPERATION_LOCK_0)
-               mutex_unlock(&vnode->io_lock);
+               afs_unlock_for_io(vnode);
 }
 
 static void afs_prepare_vnode(struct afs_operation *op, struct afs_vnode_param 
*vp,
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index c9d620175e80..07b8f7083e73 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -702,13 +702,14 @@ struct afs_vnode {
        struct afs_file_status  status;         /* AFS status info for this 
file */
        afs_dataversion_t       invalid_before; /* Child dentries are invalid 
before this */
        struct afs_permits __rcu *permit_cache; /* cache of permits so far 
obtained */
-       struct mutex            io_lock;        /* Lock for serialising I/O on 
this mutex */
+       struct list_head        io_lock_waiters; /* Threads waiting for the I/O 
lock */
        struct rw_semaphore     validate_lock;  /* lock for validating this 
vnode */
        struct rw_semaphore     rmdir_lock;     /* Lock for rmdir vs 
sillyrename */
        struct key              *silly_key;     /* Silly rename key */
        spinlock_t              wb_lock;        /* lock for wb_keys */
        spinlock_t              lock;           /* waitqueue/flags lock */
        unsigned long           flags;
+#define AFS_VNODE_IO_LOCK      0               /* Set if the I/O serialisation 
lock is held */
 #define AFS_VNODE_UNSET                1               /* set if vnode 
attributes not yet set */
 #define AFS_VNODE_DIR_VALID    2               /* Set if dir contents are 
valid */
 #define AFS_VNODE_ZAP_DATA     3               /* set if vnode's data should 
be invalidated */
diff --git a/fs/afs/super.c b/fs/afs/super.c
index f3ba1c3e72f5..7631302c1984 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -663,7 +663,7 @@ static void afs_i_init_once(void *_vnode)
 
        memset(vnode, 0, sizeof(*vnode));
        inode_init_once(&vnode->netfs.inode);
-       mutex_init(&vnode->io_lock);
+       INIT_LIST_HEAD(&vnode->io_lock_waiters);
        init_rwsem(&vnode->validate_lock);
        spin_lock_init(&vnode->wb_lock);
        spin_lock_init(&vnode->lock);

Reply via email to