Now that i_count is a proper reference count, such that 0 means free
or freeing, and all .destroy_inode methods use RCU to free inodes, we
can trivially convert the inode hash to RCU and do RCU lookups.

So provide RCU variants of find_inode() and find_inode_fast(), in case
we do hit an inode with i_count==0, we fall back to the old code that
does a __wait_for_freeing_inode().

_However_ this makes the situation with using i_count() for decisions
far worse; those few that are in the evict/free path seem safe, the
rest is up for grabs.

If the rest of the sites were OK, they probably are no longer and need
help.

Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org>
---
 fs/inode.c |  104 ++++++++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 76 insertions(+), 28 deletions(-)

--- a/fs/inode.c
+++ b/fs/inode.c
@@ -471,7 +471,7 @@ void __insert_inode_hash(struct inode *i
 
        spin_lock(&inode_hash_lock);
        spin_lock(&inode->i_lock);
-       hlist_add_head(&inode->i_hash, b);
+       hlist_add_head_rcu(&inode->i_hash, b);
        spin_unlock(&inode->i_lock);
        spin_unlock(&inode_hash_lock);
 }
@@ -487,7 +487,7 @@ void __remove_inode_hash(struct inode *i
 {
        spin_lock(&inode_hash_lock);
        spin_lock(&inode->i_lock);
-       hlist_del_init(&inode->i_hash);
+       hlist_del_init_rcu(&inode->i_hash);
        spin_unlock(&inode->i_lock);
        spin_unlock(&inode_hash_lock);
 }
@@ -777,15 +777,15 @@ long prune_icache_sb(struct super_block
 }
 
 static void __wait_on_freeing_inode(struct inode *inode);
-/*
- * Called with the inode lock held.
- */
-static struct inode *find_inode(struct super_block *sb,
+
+static struct inode *__find_inode(struct super_block *sb,
                                struct hlist_head *head,
                                int (*test)(struct inode *, void *),
                                void *data)
 {
-       struct inode *inode = NULL;
+       struct inode *inode;
+
+       lockdep_assert_held(&inode_hash_lock);
 
 repeat:
        hlist_for_each_entry(inode, head, i_hash) {
@@ -805,14 +805,44 @@ static struct inode *find_inode(struct s
        return NULL;
 }
 
+static struct inode *find_inode(struct super_block *sb,
+                               struct hlist_head *head,
+                               int (*test)(struct inode *, void *),
+                               void *data)
+{
+       struct inode *inode;
+
+       rcu_read_lock();
+       hlist_for_each_entry_rcu(inode, head, i_hash) {
+               if (inode->i_sb != sb)
+                       continue;
+               if (!test(inode, data))
+                       continue;
+               if (atomic_inc_not_zero(&inode->i_count))
+                       goto out_unlock;
+               goto slow;
+       }
+       inode = NULL;
+out_unlock:
+       rcu_read_unlock();
+       return inode;
+
+slow:
+       rcu_read_unlock();
+       spin_lock(&inode_hash_lock);
+       inode = __find_inode(sb, head, test, data);
+       spin_unlock(&inode_hash_lock);
+       return inode;
+}
+
 /*
- * find_inode_fast is the fast path version of find_inode, see the comment at
+ * __find_inode_fast is the fast path version of __find_inode, see the comment 
at
  * iget_locked for details.
  */
-static struct inode *find_inode_fast(struct super_block *sb,
-                               struct hlist_head *head, unsigned long ino)
+static struct inode *__find_inode_fast(struct super_block *sb,
+               struct hlist_head *head, unsigned long ino)
 {
-       struct inode *inode = NULL;
+       struct inode *inode;
 
        lockdep_assert_held(&inode_hash_lock);
 
@@ -834,6 +864,34 @@ static struct inode *find_inode_fast(str
        return NULL;
 }
 
+static struct inode *find_inode_fast(struct super_block *sb,
+               struct hlist_head *head, unsigned long ino)
+{
+       struct inode *inode;
+
+       rcu_read_lock();
+       hlist_for_each_entry_rcu(inode, head, i_hash) {
+               if (inode->i_ino != ino)
+                       continue;
+               if (inode->i_sb != sb)
+                       continue;
+               if (atomic_inc_not_zero(&inode->i_count))
+                       goto out_unlock;
+               goto slow;
+       }
+       inode = NULL;
+out_unlock:
+       rcu_read_unlock();
+       return inode;
+
+slow:
+       rcu_read_unlock();
+       spin_lock(&inode_hash_lock);
+       inode = __find_inode_fast(sb, head, ino);
+       spin_unlock(&inode_hash_lock);
+       return inode;
+}
+
 /*
  * Each cpu owns a range of LAST_INO_BATCH numbers.
  * 'shared_last_ino' is dirtied only once out of LAST_INO_BATCH allocations,
@@ -1026,10 +1084,7 @@ struct inode *iget5_locked(struct super_
        struct hlist_head *head = inode_hashtable + hash(sb, hashval);
        struct inode *inode;
 again:
-       spin_lock(&inode_hash_lock);
        inode = find_inode(sb, head, test, data);
-       spin_unlock(&inode_hash_lock);
-
        if (inode) {
                wait_on_inode(inode);
                if (unlikely(inode_unhashed(inode))) {
@@ -1045,14 +1100,14 @@ struct inode *iget5_locked(struct super_
 
                spin_lock(&inode_hash_lock);
                /* We released the lock, so.. */
-               old = find_inode(sb, head, test, data);
+               old = __find_inode(sb, head, test, data);
                if (!old) {
                        if (set(inode, data))
                                goto set_failed;
 
                        spin_lock(&inode->i_lock);
                        inode->i_state = I_NEW;
-                       hlist_add_head(&inode->i_hash, head);
+                       hlist_add_head_rcu(&inode->i_hash, head);
                        spin_unlock(&inode->i_lock);
                        inode_sb_list_add(inode);
                        spin_unlock(&inode_hash_lock);
@@ -1104,9 +1159,7 @@ struct inode *iget_locked(struct super_b
        struct hlist_head *head = inode_hashtable + hash(sb, ino);
        struct inode *inode;
 again:
-       spin_lock(&inode_hash_lock);
        inode = find_inode_fast(sb, head, ino);
-       spin_unlock(&inode_hash_lock);
        if (inode) {
                wait_on_inode(inode);
                if (unlikely(inode_unhashed(inode))) {
@@ -1122,12 +1175,12 @@ struct inode *iget_locked(struct super_b
 
                spin_lock(&inode_hash_lock);
                /* We released the lock, so.. */
-               old = find_inode_fast(sb, head, ino);
+               old = __find_inode_fast(sb, head, ino);
                if (!old) {
                        inode->i_ino = ino;
                        spin_lock(&inode->i_lock);
                        inode->i_state = I_NEW;
-                       hlist_add_head(&inode->i_hash, head);
+                       hlist_add_head_rcu(&inode->i_hash, head);
                        spin_unlock(&inode->i_lock);
                        inode_sb_list_add(inode);
                        spin_unlock(&inode_hash_lock);
@@ -1258,9 +1311,7 @@ struct inode *ilookup5_nowait(struct sup
        struct hlist_head *head = inode_hashtable + hash(sb, hashval);
        struct inode *inode;
 
-       spin_lock(&inode_hash_lock);
        inode = find_inode(sb, head, test, data);
-       spin_unlock(&inode_hash_lock);
 
        return inode;
 }
@@ -1313,10 +1364,7 @@ struct inode *ilookup(struct super_block
        struct hlist_head *head = inode_hashtable + hash(sb, ino);
        struct inode *inode;
 again:
-       spin_lock(&inode_hash_lock);
        inode = find_inode_fast(sb, head, ino);
-       spin_unlock(&inode_hash_lock);
-
        if (inode) {
                wait_on_inode(inode);
                if (unlikely(inode_unhashed(inode))) {
@@ -1345,7 +1393,7 @@ EXPORT_SYMBOL(ilookup);
  * the inode_hash_lock spinlock held.
  *
  * This is a even more generalized version of ilookup5() when the
- * function must never block --- find_inode() can block in
+ * function must never block --- __find_inode() can block in
  * __wait_on_freeing_inode() --- or when the caller can not increment
  * the reference count because the resulting iput() might cause an
  * inode eviction.  The tradeoff is that the @match funtion must be
@@ -1402,7 +1450,7 @@ int insert_inode_locked(struct inode *in
                if (likely(!old)) {
                        spin_lock(&inode->i_lock);
                        inode->i_state |= I_NEW;
-                       hlist_add_head(&inode->i_hash, head);
+                       hlist_add_head_rcu(&inode->i_hash, head);
                        spin_unlock(&inode->i_lock);
                        spin_unlock(&inode_hash_lock);
                        return 0;
@@ -1445,7 +1493,7 @@ int insert_inode_locked4(struct inode *i
                if (likely(!old)) {
                        spin_lock(&inode->i_lock);
                        inode->i_state |= I_NEW;
-                       hlist_add_head(&inode->i_hash, head);
+                       hlist_add_head_rcu(&inode->i_hash, head);
                        spin_unlock(&inode->i_lock);
                        spin_unlock(&inode_hash_lock);
                        return 0;


Reply via email to