Add a bloom filter to exclude entries that are not in `pwd->i_subdirs`, thereby improving the performance of `erofs_rebuild_get_dentry`. Below are the results for different # of files in the same directory:
+---------+--------------------+ | # files | time reduction (%) | +---------+--------------------+ | 1e4 | 55% | +---------+--------------------+ | 1e5 | 98% | +---------+--------------------+ | 2e5 | 98% | +---------+--------------------+ | 3e5 | 99% | +---------+--------------------+ Signed-off-by: Hongzhen Luo <hongz...@linux.alibaba.com> --- v2: Changes since v1: - move erofs_bloom_init() to erofs_super_init() - make the # hash function and bit array size of the bloom filter configurable v1: https://lore.kernel.org/all/20240718054025.427439-3-hongz...@linux.alibaba.com/ --- include/erofs/config.h | 2 ++ include/erofs/internal.h | 1 + lib/rebuild.c | 61 ++++++++++++++++++++++++++++++++++------ lib/super.c | 27 ++++++++++++++++++ mkfs/main.c | 20 +++++++++++++ 5 files changed, 103 insertions(+), 8 deletions(-) diff --git a/include/erofs/config.h b/include/erofs/config.h index f8726b5..c322c9a 100644 --- a/include/erofs/config.h +++ b/include/erofs/config.h @@ -92,6 +92,8 @@ struct erofs_configure { char *fs_config_file; char *block_list_file; #endif + u32 c_bloom_nrfuc; + unsigned long c_bloom_bitsize; }; extern struct erofs_configure cfg; diff --git a/include/erofs/internal.h b/include/erofs/internal.h index d3dd676..ef7dd2d 100644 --- a/include/erofs/internal.h +++ b/include/erofs/internal.h @@ -402,6 +402,7 @@ struct erofs_map_dev { }; /* super.c */ +int erofs_super_init(struct erofs_sb_info *sbi); int erofs_read_superblock(struct erofs_sb_info *sbi); void erofs_put_super(struct erofs_sb_info *sbi); int erofs_writesb(struct erofs_sb_info *sbi, struct erofs_buffer_head *sb_bh, diff --git a/lib/rebuild.c b/lib/rebuild.c index 9e8bf8f..3fd3ea0 100644 --- a/lib/rebuild.c +++ b/lib/rebuild.c @@ -15,6 +15,7 @@ #include "erofs/xattr.h" #include "erofs/blobchunk.h" #include "erofs/internal.h" +#include "erofs/bloom_filter.h" #include "liberofs_uuid.h" #ifdef HAVE_LINUX_AUFS_TYPE_H @@ -62,14 +63,48 @@ struct erofs_dentry *erofs_rebuild_get_dentry(struct erofs_inode *pwd, char *path, bool aufs, bool *whout, bool *opq, bool to_head) { struct erofs_dentry *d = NULL; - unsigned int len = strlen(path); - char *s = path; + unsigned int len, p = 0; + char *s; + struct erofs_sb_info *sbi = pwd->sbi; + char buf[4096]; *whout = false; *opq = false; + s = pwd->i_srcpath; + len = strlen(pwd->i_srcpath); + /* normalize the pwd path, e.g., /./../xxx/yyy -> /xxx/yyy */ + buf[p++] = '/'; + while (s < pwd->i_srcpath + len) { + char *slash = memchr(s, '/', pwd->i_srcpath + len - s); + if (slash) { + if (s == slash) { + while(*++s == '/'); + continue;; + } + *slash = '\0'; + } + if (memcmp(s, ".", 2) && memcmp(s, "..", 3)) { + memcpy(buf + p, s, strlen(s)); + p += strlen(s); + buf[p++] = '/'; + + } + if (slash) { + *slash = '/'; + s = slash + 1; + } else{ + break; + } + } + if (buf[p - 1] != '/') + buf[p++] = '/'; + + s = path; + len = strlen(path); while (s < path + len) { char *slash = memchr(s, '/', path + len - s); + int bloom, slen; if (slash) { if (s == slash) { @@ -97,13 +132,19 @@ struct erofs_dentry *erofs_rebuild_get_dentry(struct erofs_inode *pwd, } } - list_for_each_entry(d, &pwd->i_subdirs, d_child) { - if (!strcmp(d->name, s)) { - if (d->type != EROFS_FT_DIR && slash) - return ERR_PTR(-EIO); - inode = d->inode; - break; + slen = strlen(s); + memcpy(buf + p, s, slen); + p += slen; + if ((bloom = erofs_bloom_test(sbi, buf, p)) > 0) { + list_for_each_entry(d, &pwd->i_subdirs, d_child) { + if (!strcmp(d->name, s)) { + if (d->type != EROFS_FT_DIR && slash) + return ERR_PTR(-EIO); + inode = d->inode; + break; + } } + } if (inode) { @@ -124,6 +165,10 @@ struct erofs_dentry *erofs_rebuild_get_dentry(struct erofs_inode *pwd, return d; pwd = d->inode; } + if (!bloom && erofs_bloom_add(sbi, buf, p)) + return ERR_PTR(-EINVAL); + if (slash) + buf[p++] = '/'; } if (slash) { *slash = '/'; diff --git a/lib/super.c b/lib/super.c index 45233c4..39c3bfd 100644 --- a/lib/super.c +++ b/lib/super.c @@ -4,9 +4,14 @@ */ #include <string.h> #include <stdlib.h> +#include <time.h> #include "erofs/print.h" #include "erofs/xattr.h" #include "erofs/cache.h" +#include "erofs/bloom_filter.h" + +#define DEFAULT_BLOOM_NRFUNC 5 +#define DEFAULT_BLOOM_SIZE 1000000 static bool check_layout_compatibility(struct erofs_sb_info *sbi, struct erofs_super_block *dsb) @@ -72,6 +77,27 @@ static int erofs_init_devices(struct erofs_sb_info *sbi, return 0; } +int erofs_super_init(struct erofs_sb_info *sbi) +{ + int err; + + srand(time(NULL)); + if (cfg.c_bloom_nrfuc && cfg.c_bloom_bitsize) + err = erofs_bloom_init(sbi, cfg.c_bloom_nrfuc, + cfg.c_bloom_bitsize, rand()); + else + err = erofs_bloom_init(sbi, DEFAULT_BLOOM_NRFUNC, + DEFAULT_BLOOM_SIZE, rand()); + if (err) { + erofs_err("failed to init bloom filter"); + goto exit; + } + + err = 0; +exit: + return err; +} + int erofs_read_superblock(struct erofs_sb_info *sbi) { u8 data[EROFS_MAX_BLOCK_SIZE]; @@ -153,6 +179,7 @@ void erofs_put_super(struct erofs_sb_info *sbi) erofs_buffer_exit(sbi->bmgr); sbi->bmgr = NULL; } + erofs_bloom_exit(sbi); } int erofs_writesb(struct erofs_sb_info *sbi, struct erofs_buffer_head *sb_bh, diff --git a/mkfs/main.c b/mkfs/main.c index 20f12fc..21003bc 100644 --- a/mkfs/main.c +++ b/mkfs/main.c @@ -31,6 +31,7 @@ #include "../lib/liberofs_private.h" #include "../lib/liberofs_uuid.h" #include "../lib/compressor.h" +#include "erofs/bloom_filter.h" static struct option long_options[] = { {"version", no_argument, 0, 'V'}, @@ -81,6 +82,7 @@ static struct option long_options[] = { {"zfeature-bits", required_argument, NULL, 521}, {"clean", optional_argument, NULL, 522}, {"incremental", optional_argument, NULL, 523}, + {"bloom-filter", optional_argument, NULL, 524}, {0, 0, 0, 0}, }; @@ -179,6 +181,9 @@ static void usage(int argc, char **argv) " headerball=file data is omited in the source stream)\n" " --ovlfs-strip=<0,1> strip overlayfs metadata in the target image (e.g. whiteouts)\n" " --quiet quiet execution (do not write anything to standard output.)\n" + " --bloom-filter=X boost up images generation with bloom filter\n" + " (X=n,s; n=# of hash functions (default 5),\n" + " s=bit array size (default 1e6))\n" #ifndef NDEBUG " --random-pclusterblks randomize pclusterblks for big pcluster (debugging only)\n" " --random-algorithms randomize per-file algorithms (debugging only)\n" @@ -820,6 +825,20 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) } incremental_mode = (opt == 523); break; + case 524: + cfg.c_bloom_nrfuc = strtol(optarg, &endptr, 0); + if (*endptr != ',') { + erofs_err("invalid --bloom-filter=%s", optarg); + cfg.c_bloom_nrfuc = 0; + return -EINVAL; + } + cfg.c_bloom_bitsize = strtol(endptr + 1, &endptr, 0); + if (*endptr != '\0') { + erofs_err("invalid --bloom-filter=%s", optarg); + cfg.c_bloom_bitsize = 0; + return -EINVAL; + } + break; case 'V': version(); exit(0); @@ -1344,6 +1363,7 @@ int main(int argc, char **argv) } erofs_inode_manager_init(); + erofs_super_init(&g_sbi); if (tar_mode) { root = erofs_rebuild_make_root(&g_sbi); -- 2.43.5