Add a bloom filter to exclude entries that are not in `pwd->i_subdirs`, thereby improving the performance of `erofs_rebuild_get_dentry`. Below are the results for different # of files in the same directory:
+---------+--------------------+ | # files | time reduction (%) | +---------+--------------------+ | 1e4 | 55% | +---------+--------------------+ | 1e5 | 98% | +---------+--------------------+ | 2e5 | 98% | +---------+--------------------+ | 3e5 | 99% | +---------+--------------------+ Signed-off-by: Hongzhen Luo <hongz...@linux.alibaba.com> --- lib/rebuild.c | 61 ++++++++++++++++++++++++++++++++++++++++++++------- lib/super.c | 2 ++ mkfs/main.c | 4 ++++ 3 files changed, 59 insertions(+), 8 deletions(-) diff --git a/lib/rebuild.c b/lib/rebuild.c index 9e8bf8f..3fd3ea0 100644 --- a/lib/rebuild.c +++ b/lib/rebuild.c @@ -15,6 +15,7 @@ #include "erofs/xattr.h" #include "erofs/blobchunk.h" #include "erofs/internal.h" +#include "erofs/bloom_filter.h" #include "liberofs_uuid.h" #ifdef HAVE_LINUX_AUFS_TYPE_H @@ -62,14 +63,48 @@ struct erofs_dentry *erofs_rebuild_get_dentry(struct erofs_inode *pwd, char *path, bool aufs, bool *whout, bool *opq, bool to_head) { struct erofs_dentry *d = NULL; - unsigned int len = strlen(path); - char *s = path; + unsigned int len, p = 0; + char *s; + struct erofs_sb_info *sbi = pwd->sbi; + char buf[4096]; *whout = false; *opq = false; + s = pwd->i_srcpath; + len = strlen(pwd->i_srcpath); + /* normalize the pwd path, e.g., /./../xxx/yyy -> /xxx/yyy */ + buf[p++] = '/'; + while (s < pwd->i_srcpath + len) { + char *slash = memchr(s, '/', pwd->i_srcpath + len - s); + if (slash) { + if (s == slash) { + while(*++s == '/'); + continue;; + } + *slash = '\0'; + } + if (memcmp(s, ".", 2) && memcmp(s, "..", 3)) { + memcpy(buf + p, s, strlen(s)); + p += strlen(s); + buf[p++] = '/'; + + } + if (slash) { + *slash = '/'; + s = slash + 1; + } else{ + break; + } + } + if (buf[p - 1] != '/') + buf[p++] = '/'; + + s = path; + len = strlen(path); while (s < path + len) { char *slash = memchr(s, '/', path + len - s); + int bloom, slen; if (slash) { if (s == slash) { @@ -97,13 +132,19 @@ struct erofs_dentry *erofs_rebuild_get_dentry(struct erofs_inode *pwd, } } - list_for_each_entry(d, &pwd->i_subdirs, d_child) { - if (!strcmp(d->name, s)) { - if (d->type != EROFS_FT_DIR && slash) - return ERR_PTR(-EIO); - inode = d->inode; - break; + slen = strlen(s); + memcpy(buf + p, s, slen); + p += slen; + if ((bloom = erofs_bloom_test(sbi, buf, p)) > 0) { + list_for_each_entry(d, &pwd->i_subdirs, d_child) { + if (!strcmp(d->name, s)) { + if (d->type != EROFS_FT_DIR && slash) + return ERR_PTR(-EIO); + inode = d->inode; + break; + } } + } if (inode) { @@ -124,6 +165,10 @@ struct erofs_dentry *erofs_rebuild_get_dentry(struct erofs_inode *pwd, return d; pwd = d->inode; } + if (!bloom && erofs_bloom_add(sbi, buf, p)) + return ERR_PTR(-EINVAL); + if (slash) + buf[p++] = '/'; } if (slash) { *slash = '/'; diff --git a/lib/super.c b/lib/super.c index 45233c4..7289236 100644 --- a/lib/super.c +++ b/lib/super.c @@ -7,6 +7,7 @@ #include "erofs/print.h" #include "erofs/xattr.h" #include "erofs/cache.h" +#include "erofs/bloom_filter.h" static bool check_layout_compatibility(struct erofs_sb_info *sbi, struct erofs_super_block *dsb) @@ -153,6 +154,7 @@ void erofs_put_super(struct erofs_sb_info *sbi) erofs_buffer_exit(sbi->bmgr); sbi->bmgr = NULL; } + erofs_bloom_exit(sbi); } int erofs_writesb(struct erofs_sb_info *sbi, struct erofs_buffer_head *sb_bh, diff --git a/mkfs/main.c b/mkfs/main.c index 20f12fc..27a2ea0 100644 --- a/mkfs/main.c +++ b/mkfs/main.c @@ -31,6 +31,7 @@ #include "../lib/liberofs_private.h" #include "../lib/liberofs_uuid.h" #include "../lib/compressor.h" +#include "erofs/bloom_filter.h" static struct option long_options[] = { {"version", no_argument, 0, 'V'}, @@ -1344,6 +1345,9 @@ int main(int argc, char **argv) } erofs_inode_manager_init(); + srand(time(NULL)); + /* 1000000 should be enough */ + erofs_bloom_init(&g_sbi, 5, 1000000, rand()); if (tar_mode) { root = erofs_rebuild_make_root(&g_sbi); -- 2.43.5