This patch supports to readahead more blocks in erofs_readdir(), it can enhance readdir performance in large direcotry.
readdir test in a large directory which contains 12000 sub-files. files_per_second Before: 926385.54 After: 2380435.562 Meanwhile, let's introduces a new sysfs entry to control readahead bytes to provide more flexible policy for readahead of readdir(). - location: /sys/fs/erofs/<disk>/dir_ra_bytes - default value: 16384 - disable readahead: set the value to 0 Signed-off-by: Chao Yu <c...@kernel.org> --- v4: - clean up codes and comments Documentation/ABI/testing/sysfs-fs-erofs | 8 ++++++++ fs/erofs/dir.c | 14 ++++++++++++++ fs/erofs/internal.h | 4 ++++ fs/erofs/super.c | 2 ++ fs/erofs/sysfs.c | 2 ++ 5 files changed, 30 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-fs-erofs b/Documentation/ABI/testing/sysfs-fs-erofs index bf3b6299c15e..85fa56ca092c 100644 --- a/Documentation/ABI/testing/sysfs-fs-erofs +++ b/Documentation/ABI/testing/sysfs-fs-erofs @@ -35,3 +35,11 @@ Description: Used to set or show hardware accelerators in effect and multiple accelerators are separated by '\n'. Supported accelerator(s): qat_deflate. Disable all accelerators with an empty string (echo > accel). + +What: /sys/fs/erofs/<disk>/dir_ra_bytes +Date: July 2025 +Contact: "Chao Yu" <c...@kernel.org> +Description: Used to set or show readahead bytes during readdir(), by + default the value is 16384. + + - 0: disable readahead. diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c index 3e4b38bec0aa..99745c272b60 100644 --- a/fs/erofs/dir.c +++ b/fs/erofs/dir.c @@ -47,8 +47,12 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) struct inode *dir = file_inode(f); struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct super_block *sb = dir->i_sb; + struct file_ra_state *ra = &f->f_ra; unsigned long bsz = sb->s_blocksize; unsigned int ofs = erofs_blkoff(sb, ctx->pos); + pgoff_t ra_pages = DIV_ROUND_UP_POW2( + EROFS_I_SB(dir)->dir_ra_bytes, PAGE_SIZE); + pgoff_t nr_pages = DIV_ROUND_UP_POW2(dir->i_size, PAGE_SIZE); int err = 0; bool initial = true; @@ -63,6 +67,16 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) break; } + /* readahead blocks to enhance performance in large directory */ + if (ra_pages) { + pgoff_t idx = DIV_ROUND_UP(ctx->pos, PAGE_SIZE); + pgoff_t pages = min(nr_pages - idx, ra_pages); + + if (pages > 1 && !ra_has_index(ra, idx)) + page_cache_sync_readahead(dir->i_mapping, ra, + f, idx, pages); + } + de = erofs_bread(&buf, dbstart, true); if (IS_ERR(de)) { erofs_err(sb, "failed to readdir of logical block %llu of nid %llu", diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 0d19bde8c094..8b1372521790 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -157,6 +157,7 @@ struct erofs_sb_info { /* sysfs support */ struct kobject s_kobj; /* /sys/fs/erofs/<devname> */ struct completion s_kobj_unregister; + erofs_off_t dir_ra_bytes; /* fscache support */ struct fscache_volume *volume; @@ -238,6 +239,9 @@ EROFS_FEATURE_FUNCS(xattr_filter, compat, COMPAT_XATTR_FILTER) #define EROFS_I_BL_XATTR_BIT (BITS_PER_LONG - 1) #define EROFS_I_BL_Z_BIT (BITS_PER_LONG - 2) +/* default readahead size of directories */ +#define EROFS_DIR_RA_BYTES 16384 + struct erofs_inode { erofs_nid_t nid; diff --git a/fs/erofs/super.c b/fs/erofs/super.c index e1e9f06e8342..38fc4813a896 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -715,6 +715,8 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) if (err) return err; + sbi->dir_ra_bytes = EROFS_DIR_RA_BYTES; + erofs_info(sb, "mounted with root inode @ nid %llu.", sbi->root_nid); return 0; } diff --git a/fs/erofs/sysfs.c b/fs/erofs/sysfs.c index eed8797a193f..9d9f820a5621 100644 --- a/fs/erofs/sysfs.c +++ b/fs/erofs/sysfs.c @@ -65,12 +65,14 @@ EROFS_ATTR_FUNC(drop_caches, 0200); #ifdef CONFIG_EROFS_FS_ZIP_ACCEL EROFS_ATTR_FUNC(accel, 0644); #endif +EROFS_ATTR_RW_UI(dir_ra_bytes, erofs_sb_info); static struct attribute *erofs_sb_attrs[] = { #ifdef CONFIG_EROFS_FS_ZIP ATTR_LIST(sync_decompress), ATTR_LIST(drop_caches), #endif + ATTR_LIST(dir_ra_bytes), NULL, }; ATTRIBUTE_GROUPS(erofs_sb); -- 2.49.0