Hashtable for identifying inode by i_pos (cluster+dentry) have slow
search performance when there are lots of cached inode. The slowness
is from the limited count of slots showing that, in average,
(inode count / slots) times hlist traverse, and it becomes worse when
cached inode count increases by such as directory iteration.
To improve this, change from hash table to rb tree to minimize
searching/iterate time which enables O(logN) time complexity.

Test : "time ls -l"
 +--------+------------+------------+-----------+
 |        | file count | fresh read |   cached  |
 +--------+------------+------------+-----------+
 | Before |     50,000 |   0m06.59s |  0m01.58s |
 +--------+------------+------------+-----------+
 | After  |     50,000 |   0m05.20s |  0m00.98s |
 +--------+------------+------------+-----------+
 | Before |    300,000 |   1m28.97s |  0m31.69s |
 +--------+------------+------------+-----------+
 | After  |    300,000 |   0m33.11s |  0m06.21s |
 +--------+------------+------------+-----------+

Signed-off-by: Hyeongseok Kim <hyeongs...@gmail.com>
---
 fs/exfat/exfat_fs.h | 12 +++---
 fs/exfat/inode.c    | 91 +++++++++++++++++++++++++++++++--------------
 fs/exfat/namei.c    | 10 ++---
 fs/exfat/super.c    | 16 ++++----
 4 files changed, 82 insertions(+), 47 deletions(-)

diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index 1d6da61157c9..f8ad8cbf8499 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -243,8 +243,8 @@ struct exfat_sb_info {
        struct nls_table *nls_io; /* Charset used for input and display */
        struct ratelimit_state ratelimit;
 
-       spinlock_t inode_hash_lock;
-       struct hlist_head inode_hashtable[EXFAT_HASH_SIZE];
+       spinlock_t inode_tree_lock; /* lock for inode_tree structure */
+       struct rb_root inode_tree;
 
        struct rcu_head rcu;
 };
@@ -289,8 +289,8 @@ struct exfat_inode_info {
        loff_t i_size_aligned;
        /* on-disk position of directory entry or 0 */
        loff_t i_pos;
-       /* hash by i_location */
-       struct hlist_node i_hash_fat;
+       /* tree by i_location */
+       struct rb_node rbnode;
        /* protect bmap against truncate */
        struct rw_semaphore truncate_lock;
        struct inode vfs_inode;
@@ -476,8 +476,8 @@ extern const struct inode_operations 
exfat_file_inode_operations;
 void exfat_sync_inode(struct inode *inode);
 struct inode *exfat_build_inode(struct super_block *sb,
                struct exfat_dir_entry *info, loff_t i_pos);
-void exfat_hash_inode(struct inode *inode, loff_t i_pos);
-void exfat_unhash_inode(struct inode *inode);
+void exfat_inode_tree_insert(struct inode *inode, loff_t i_pos);
+void exfat_inode_tree_erase(struct inode *inode);
 struct inode *exfat_iget(struct super_block *sb, loff_t i_pos);
 int exfat_write_inode(struct inode *inode, struct writeback_control *wbc);
 void exfat_evict_inode(struct inode *inode);
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index 1803ef3220fd..740a34f528ae 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -501,50 +501,87 @@ static const struct address_space_operations exfat_aops = 
{
        .bmap           = exfat_aop_bmap
 };
 
-static inline unsigned long exfat_hash(loff_t i_pos)
+static struct exfat_inode_info *exfat_inode_tree_find(struct super_block *sb,
+                                                     loff_t i_pos)
 {
-       return hash_32(i_pos, EXFAT_HASH_BITS);
+       struct exfat_sb_info *sbi = EXFAT_SB(sb);
+       struct rb_node *node = sbi->inode_tree.rb_node;
+       struct exfat_inode_info *info;
+
+       spin_lock(&sbi->inode_tree_lock);
+       while (node) {
+               info = rb_entry(node, struct exfat_inode_info, rbnode);
+               WARN_ON(info->vfs_inode.i_sb != sb);
+
+               if (i_pos == info->i_pos) {
+                       spin_unlock(&sbi->inode_tree_lock);
+                       return info;
+               }
+
+               if (i_pos < info->i_pos)
+                       node = node->rb_left;
+               else /* i_pos > info->i_pos */
+                       node = node->rb_right;
+       }
+       spin_unlock(&sbi->inode_tree_lock);
+       return NULL;
 }
 
-void exfat_hash_inode(struct inode *inode, loff_t i_pos)
+void exfat_inode_tree_insert(struct inode *inode, loff_t i_pos)
 {
        struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb);
-       struct hlist_head *head = sbi->inode_hashtable + exfat_hash(i_pos);
+       struct exfat_inode_info *info, *ei = EXFAT_I(inode);
+       struct rb_root *root = &sbi->inode_tree;
+       struct rb_node **rb_ptr = &root->rb_node;
+       struct rb_node *parent = NULL;
+
+       spin_lock(&sbi->inode_tree_lock);
+       ei->i_pos = i_pos;
+       while (*rb_ptr) {
+               parent = *rb_ptr;
+               info = rb_entry(*rb_ptr, struct exfat_inode_info, rbnode);
+               if (i_pos == info->i_pos) {
+                       /* already exists */
+                       rb_replace_node(*rb_ptr, &ei->rbnode, &sbi->inode_tree);
+                       RB_CLEAR_NODE(*rb_ptr);
+                       spin_unlock(&sbi->inode_tree_lock);
+                       return;
+               }
 
-       spin_lock(&sbi->inode_hash_lock);
-       EXFAT_I(inode)->i_pos = i_pos;
-       hlist_add_head(&EXFAT_I(inode)->i_hash_fat, head);
-       spin_unlock(&sbi->inode_hash_lock);
+               if (i_pos < info->i_pos)
+                       rb_ptr = &(*rb_ptr)->rb_left;
+               else /* (i_pos > info->i_pos) */
+                       rb_ptr = &(*rb_ptr)->rb_right;
+       }
+
+       rb_link_node(&ei->rbnode, parent, rb_ptr);
+       rb_insert_color(&ei->rbnode, root);
+       spin_unlock(&sbi->inode_tree_lock);
 }
 
-void exfat_unhash_inode(struct inode *inode)
+void exfat_inode_tree_erase(struct inode *inode)
 {
        struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb);
+       struct exfat_inode_info *ei = EXFAT_I(inode);
+       struct rb_root *root = &sbi->inode_tree;
 
-       spin_lock(&sbi->inode_hash_lock);
-       hlist_del_init(&EXFAT_I(inode)->i_hash_fat);
-       EXFAT_I(inode)->i_pos = 0;
-       spin_unlock(&sbi->inode_hash_lock);
+       spin_lock(&sbi->inode_tree_lock);
+       if (!RB_EMPTY_NODE(&ei->rbnode)) {
+               rb_erase(&ei->rbnode, root);
+               RB_CLEAR_NODE(&ei->rbnode);
+       }
+       spin_unlock(&sbi->inode_tree_lock);
 }
 
 struct inode *exfat_iget(struct super_block *sb, loff_t i_pos)
 {
-       struct exfat_sb_info *sbi = EXFAT_SB(sb);
        struct exfat_inode_info *info;
-       struct hlist_head *head = sbi->inode_hashtable + exfat_hash(i_pos);
        struct inode *inode = NULL;
 
-       spin_lock(&sbi->inode_hash_lock);
-       hlist_for_each_entry(info, head, i_hash_fat) {
-               WARN_ON(info->vfs_inode.i_sb != sb);
-
-               if (i_pos != info->i_pos)
-                       continue;
+       info = exfat_inode_tree_find(sb, i_pos);
+       if (info)
                inode = igrab(&info->vfs_inode);
-               if (inode)
-                       break;
-       }
-       spin_unlock(&sbi->inode_hash_lock);
+
        return inode;
 }
 
@@ -634,7 +671,7 @@ struct inode *exfat_build_inode(struct super_block *sb,
                inode = ERR_PTR(err);
                goto out;
        }
-       exfat_hash_inode(inode, i_pos);
+       exfat_inode_tree_insert(inode, i_pos);
        insert_inode_hash(inode);
 out:
        return inode;
@@ -654,5 +691,5 @@ void exfat_evict_inode(struct inode *inode)
        invalidate_inode_buffers(inode);
        clear_inode(inode);
        exfat_cache_inval_inode(inode);
-       exfat_unhash_inode(inode);
+       exfat_inode_tree_erase(inode);
 }
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index 24b41103d1cc..10ed9b35fd86 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -827,7 +827,7 @@ static int exfat_unlink(struct inode *dir, struct dentry 
*dentry)
        clear_nlink(inode);
        inode->i_mtime = inode->i_atime = current_time(inode);
        exfat_truncate_atime(&inode->i_atime);
-       exfat_unhash_inode(inode);
+       exfat_inode_tree_erase(inode);
        exfat_d_version_set(dentry, inode_query_iversion(dir));
 unlock:
        mutex_unlock(&EXFAT_SB(sb)->s_lock);
@@ -993,7 +993,7 @@ static int exfat_rmdir(struct inode *dir, struct dentry 
*dentry)
        clear_nlink(inode);
        inode->i_mtime = inode->i_atime = current_time(inode);
        exfat_truncate_atime(&inode->i_atime);
-       exfat_unhash_inode(inode);
+       exfat_inode_tree_erase(inode);
        exfat_d_version_set(dentry, inode_query_iversion(dir));
 unlock:
        mutex_unlock(&EXFAT_SB(inode->i_sb)->s_lock);
@@ -1363,8 +1363,8 @@ static int exfat_rename(struct user_namespace *mnt_userns,
 
        i_pos = ((loff_t)EXFAT_I(old_inode)->dir.dir << 32) |
                (EXFAT_I(old_inode)->entry & 0xffffffff);
-       exfat_unhash_inode(old_inode);
-       exfat_hash_inode(old_inode, i_pos);
+       exfat_inode_tree_erase(old_inode);
+       exfat_inode_tree_insert(old_inode, i_pos);
        if (IS_DIRSYNC(new_dir))
                exfat_sync_inode(old_inode);
        else
@@ -1384,7 +1384,7 @@ static int exfat_rename(struct user_namespace *mnt_userns,
                mark_inode_dirty(old_dir);
 
        if (new_inode) {
-               exfat_unhash_inode(new_inode);
+               exfat_inode_tree_erase(new_inode);
 
                /* skip drop_nlink if new_inode already has been dropped */
                if (new_inode->i_nlink) {
diff --git a/fs/exfat/super.c b/fs/exfat/super.c
index d38d17a77e76..8f197432c57b 100644
--- a/fs/exfat/super.c
+++ b/fs/exfat/super.c
@@ -317,14 +317,12 @@ static int exfat_parse_param(struct fs_context *fc, 
struct fs_parameter *param)
        return 0;
 }
 
-static void exfat_hash_init(struct super_block *sb)
+static void exfat_inode_tree_init(struct super_block *sb)
 {
        struct exfat_sb_info *sbi = EXFAT_SB(sb);
-       int i;
 
-       spin_lock_init(&sbi->inode_hash_lock);
-       for (i = 0; i < EXFAT_HASH_SIZE; i++)
-               INIT_HLIST_HEAD(&sbi->inode_hashtable[i]);
+       spin_lock_init(&sbi->inode_tree_lock);
+       sbi->inode_tree = RB_ROOT;
 }
 
 static int exfat_read_root(struct inode *inode)
@@ -648,8 +646,8 @@ static int exfat_fill_super(struct super_block *sb, struct 
fs_context *fc)
                goto check_nls_io;
        }
 
-       /* set up enough so that it can read an inode */
-       exfat_hash_init(sb);
+       /* set up exfat inode tree */
+       exfat_inode_tree_init(sb);
 
        if (!strcmp(sbi->options.iocharset, "utf8"))
                opts->utf8 = 1;
@@ -683,7 +681,7 @@ static int exfat_fill_super(struct super_block *sb, struct 
fs_context *fc)
                goto put_inode;
        }
 
-       exfat_hash_inode(root_inode, EXFAT_I(root_inode)->i_pos);
+       exfat_inode_tree_insert(root_inode, EXFAT_I(root_inode)->i_pos);
        insert_inode_hash(root_inode);
 
        sb->s_root = d_make_root(root_inode);
@@ -786,7 +784,7 @@ static void exfat_inode_init_once(void *foo)
        ei->nr_caches = 0;
        ei->cache_valid_id = EXFAT_CACHE_VALID + 1;
        INIT_LIST_HEAD(&ei->cache_lru);
-       INIT_HLIST_NODE(&ei->i_hash_fat);
+       RB_CLEAR_NODE(&ei->rbnode);
        inode_init_once(&ei->vfs_inode);
 }
 
-- 
2.27.0.83.g0313f36

Reply via email to