Hi Xiang, Following up on our discussion regarding the multi-threaded fsck pipeline and the need to cache metadata to reduce bottlenecking, I noticed your TODO in lib/data.c about introducing a metabox cache. I have put together this PoC. To ensure it remains highly concurrent and thread-safe for the upcoming worker threads extracting pclusters, I modeled it directly after the bucketed, erofs_rwsem_t approach used in lib/fragments.c.
Testing on an LZ4HC 4K EROFS image of the Linux 6.7 source tree showed a significant drop in I/O overhead: Baseline Extraction: 1.538s With Meta Cache PoC: 1.090s (~29% reduction) Currently, the cache grows without bounds for the PoC. Before turning this into a formal patch, I plan to add an LRU eviction policy to keep the memory footprint bound on large images. I would love your thoughts on this approach and if it aligns with your vision for the metadata caching prerequisite. Regards, Nithurshen On Wed, Mar 4, 2026 at 8:17 AM Nithurshen <[email protected]> wrote: > > This PoC introduces a thread-safe metadata cache to reduce redundant I/O > and decompression overhead during fsck extraction. It directly addresses > the TODO in erofs_bread by modeling a bucketed, rw-semaphore protected > cache after the existing fragment cache implementation. > > Baseline (LZ4HC 4K pclusters, Linux 6.7 tree): > Extraction time: 1.538s > > With Meta Cache PoC: > Extraction time: 1.090s (~29% reduction) > > Signed-off-by: Nithurshen <[email protected]> > --- > lib/data.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 81 insertions(+), 3 deletions(-) > > diff --git a/lib/data.c b/lib/data.c > index 6fd1389..bcd8d17 100644 > --- a/lib/data.c > +++ b/lib/data.c > @@ -9,6 +9,35 @@ > #include "erofs/trace.h" > #include "erofs/decompress.h" > #include "liberofs_fragments.h" > +#include "erofs/lock.h" > + > +#define META_HASHSIZE 65536 > +#define META_HASH(c) ((c) & (META_HASHSIZE - 1)) > + > +struct erofs_meta_bucket { > + struct list_head hash; > + erofs_rwsem_t lock; > +}; > + > +struct erofs_meta_item { > + struct list_head list; > + u64 key; > + char *data; > + int length; > +}; > + > +static struct erofs_meta_bucket meta_bks[META_HASHSIZE]; > +static bool meta_cache_inited = false; > + > +static void erofs_meta_cache_init(void) > +{ > + int i; > + for (i = 0; i < META_HASHSIZE; ++i) { > + init_list_head(&meta_bks[i].hash); > + erofs_init_rwsem(&meta_bks[i].lock); > + } > + meta_cache_inited = true; > +} > > void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, bool need_kmap) > { > @@ -500,7 +529,56 @@ static void *erofs_read_metadata_bdi(struct > erofs_sb_info *sbi, > void *erofs_read_metadata(struct erofs_sb_info *sbi, erofs_nid_t nid, > erofs_off_t *offset, int *lengthp) > { > + u64 key = nid ? nid : *offset; > + struct erofs_meta_bucket *bk; > + struct erofs_meta_item *item; > + void *buffer = NULL; > + > + if (__erofs_unlikely(!meta_cache_inited)) > + erofs_meta_cache_init(); > + > + bk = &meta_bks[META_HASH(key)]; > + > + erofs_down_read(&bk->lock); > + list_for_each_entry(item, &bk->hash, list) { > + if (item->key == key) { > + buffer = malloc(item->length); > + if (buffer) { > + memcpy(buffer, item->data, item->length); > + *lengthp = item->length; > + *offset = round_up(*offset, 4); > + *offset += sizeof(__le16) + item->length; > + } > + break; > + } > + } > + erofs_up_read(&bk->lock); > + > + if (buffer) > + return buffer; > + > if (nid) > - return erofs_read_metadata_nid(sbi, nid, offset, lengthp); > - return erofs_read_metadata_bdi(sbi, offset, lengthp); > -} > + buffer = erofs_read_metadata_nid(sbi, nid, offset, lengthp); > + else > + buffer = erofs_read_metadata_bdi(sbi, offset, lengthp); > + > + if (IS_ERR(buffer)) > + return buffer; > + > + item = malloc(sizeof(*item)); > + if (item) { > + item->key = key; > + item->length = *lengthp; > + item->data = malloc(*lengthp); > + if (item->data) { > + memcpy(item->data, buffer, *lengthp); > + erofs_down_write(&bk->lock); > + list_add_tail(&item->list, &bk->hash); > + erofs_up_write(&bk->lock); > + } else { > + free(item); > + } > + } > + > + return buffer; > +} > \ No newline at end of file > -- > 2.51.0 >
