Difference from the previous Yiyan's version [1], it just uses
a tmpfile to keep all decompressed data for fragments.

Dataset: linux 5.4.140
mkfs.erofs command line:
        mkfs.erofs -zlzma -C131072 -T0 -Eall-fragments,fragdedupe=inode 
foo.erofs <dir>
Test command line:
        hyperfine -p "echo 3 > /proc/sys/vm/drop_caches; sleep 1" 
"fsck/fsck.erofs --extract foo.erofs"

Vanilla:
  Time (mean ± σ):     362.309 s ±  0.406 s   [User: 360.298 s, System: 0.956 s]

After:
  Time (mean ± σ):     20.880 s ±  0.026 s    [User: 19.751 s, System: 1.058 s]

An in-memory LRU cache could also be implemented later to meet
different needs.

[1] https://lore.kernel.org/r/20231023071528.1912105-1-lyy0...@sjtu.edu.cn

Cc: Li Yiyan <lyy0...@sjtu.edu.cn>
Signed-off-by: Gao Xiang <hsiang...@linux.alibaba.com>
---
change since v1:
 - fix CI error: 
https://github.com/erofs/erofsnightly/actions/runs/12840095401/job/35808271964
fragments.c: In function ‘erofs_packedfile_preload’:
fragments.c:489:25: error: ignoring return value of ‘ftruncate’ declared with 
attribute ‘warn_unused_result’ [-Werror=unused-result]
  489 |                         ftruncate(fileno(epi->file), 0);
      |                         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
cc1: all warnings being treated as errors
   but we just ftruncate the used space for ENOSPC as an attempt,
   so don't bother with the result really.

 fsck/main.c               |  12 ++-
 fuse/main.c               |  16 +++-
 include/erofs/fragments.h |   3 +
 lib/data.c                |  14 +--
 lib/fragments.c           | 192 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 222 insertions(+), 15 deletions(-)

diff --git a/fsck/main.c b/fsck/main.c
index f56a812..d375835 100644
--- a/fsck/main.c
+++ b/fsck/main.c
@@ -16,6 +16,7 @@
 #include "erofs/dir.h"
 #include "erofs/xattr.h"
 #include "../lib/compressor.h"
+#include "erofs/fragments.h"
 
 static int erofsfsck_check_inode(erofs_nid_t pnid, erofs_nid_t nid);
 
@@ -1079,10 +1080,17 @@ int main(int argc, char *argv[])
                erofsfsck_hardlink_init();
 
        if (erofs_sb_has_fragments(&g_sbi) && g_sbi.packed_nid > 0) {
+               err = erofs_packedfile_init(&g_sbi, false);
+               if (err) {
+                       erofs_err("failed to initialize packedfile: %s",
+                                 erofs_strerror(err));
+                       goto exit_hardlink;
+               }
+
                err = erofsfsck_check_inode(g_sbi.packed_nid, g_sbi.packed_nid);
                if (err) {
                        erofs_err("failed to verify packed file");
-                       goto exit_hardlink;
+                       goto exit_packedinode;
                }
        }
 
@@ -1108,6 +1116,8 @@ int main(int argc, char *argv[])
                }
        }
 
+exit_packedinode:
+       erofs_packedfile_exit(&g_sbi);
 exit_hardlink:
        if (fsckcfg.extract_path)
                erofsfsck_hardlink_exit();
diff --git a/fuse/main.c b/fuse/main.c
index f6c04e8..cb2759e 100644
--- a/fuse/main.c
+++ b/fuse/main.c
@@ -12,6 +12,7 @@
 #include "erofs/print.h"
 #include "erofs/dir.h"
 #include "erofs/inode.h"
+#include "erofs/fragments.h"
 
 #include <float.h>
 #include <fuse.h>
@@ -688,11 +689,20 @@ int main(int argc, char *argv[])
                goto err_dev_close;
        }
 
+       if (erofs_sb_has_fragments(&g_sbi) && g_sbi.packed_nid > 0) {
+               ret = erofs_packedfile_init(&g_sbi, false);
+               if (ret) {
+                       erofs_err("failed to initialize packedfile: %s",
+                                 erofs_strerror(ret));
+                       goto err_super_put;
+               }
+       }
+
 #if FUSE_MAJOR_VERSION >= 3
        se = fuse_session_new(&args, &erofsfuse_lops, sizeof(erofsfuse_lops),
                              NULL);
        if (!se)
-               goto err_super_put;
+               goto err_packedinode;
 
        if (fuse_session_mount(se, opts.mountpoint) >= 0) {
                EROFSFUSE_MOUNT_MSG
@@ -722,7 +732,7 @@ int main(int argc, char *argv[])
 #else
        ch = fuse_mount(opts.mountpoint, &args);
        if (!ch)
-               goto err_super_put;
+               goto err_packedinode;
        EROFSFUSE_MOUNT_MSG
        se = fuse_lowlevel_new(&args, &erofsfuse_lops, sizeof(erofsfuse_lops),
                               NULL);
@@ -743,6 +753,8 @@ int main(int argc, char *argv[])
        fuse_unmount(opts.mountpoint, ch);
 #endif
 
+err_packedinode:
+       erofs_packedfile_exit(&g_sbi);
 err_super_put:
        erofs_put_super(&g_sbi);
 err_dev_close:
diff --git a/include/erofs/fragments.h b/include/erofs/fragments.h
index e92b7c7..14a1b4a 100644
--- a/include/erofs/fragments.h
+++ b/include/erofs/fragments.h
@@ -27,6 +27,9 @@ FILE *erofs_packedfile(struct erofs_sb_info *sbi);
 int erofs_packedfile_init(struct erofs_sb_info *sbi, bool fragments_mkfs);
 void erofs_packedfile_exit(struct erofs_sb_info *sbi);
 
+int erofs_packedfile_read(struct erofs_sb_info *sbi,
+                         void *buf, erofs_off_t len, erofs_off_t pos);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/data.c b/lib/data.c
index 8033208..fd9c21a 100644
--- a/lib/data.c
+++ b/lib/data.c
@@ -8,6 +8,7 @@
 #include "erofs/internal.h"
 #include "erofs/trace.h"
 #include "erofs/decompress.h"
+#include "erofs/fragments.h"
 
 static int erofs_map_blocks_flatmode(struct erofs_inode *inode,
                                     struct erofs_map_blocks *map,
@@ -248,18 +249,7 @@ int z_erofs_read_one_data(struct erofs_inode *inode,
        int ret = 0;
 
        if (map->m_flags & EROFS_MAP_FRAGMENT) {
-               struct erofs_inode packed_inode = {
-                       .sbi = sbi,
-                       .nid = sbi->packed_nid,
-               };
-
-               ret = erofs_read_inode_from_disk(&packed_inode);
-               if (ret) {
-                       erofs_err("failed to read packed inode from disk");
-                       return ret;
-               }
-
-               return erofs_pread(&packed_inode, buffer, length - skip,
+               return erofs_packedfile_read(sbi, buffer, length - skip,
                                   inode->fragmentoff + skip);
        }
 
diff --git a/lib/fragments.c b/lib/fragments.c
index 43cebe0..6eaf5b6 100644
--- a/lib/fragments.c
+++ b/lib/fragments.c
@@ -24,6 +24,7 @@
 #include "erofs/print.h"
 #include "erofs/internal.h"
 #include "erofs/fragments.h"
+#include "erofs/bitops.h"
 
 struct erofs_fragment_dedupe_item {
        struct list_head        list;
@@ -40,6 +41,11 @@ struct erofs_fragment_dedupe_item {
 struct erofs_packed_inode {
        struct list_head *hash;
        FILE *file;
+       unsigned long *uptodate;
+#if EROFS_MT_ENABLED
+       pthread_mutex_t mutex;
+#endif
+       unsigned int uptodate_size;
 };
 
 const char *erofs_frags_packedname = "packed_file";
@@ -340,6 +346,9 @@ void erofs_packedfile_exit(struct erofs_sb_info *sbi)
        if (!epi)
                return;
 
+       if (epi->uptodate)
+               free(epi->uptodate);
+
        if (epi->hash) {
                for (i = 0; i < FRAGMENT_HASHSIZE; ++i)
                        list_for_each_entry_safe(di, n, &epi->hash[i], list)
@@ -386,9 +395,192 @@ int erofs_packedfile_init(struct erofs_sb_info *sbi, bool 
fragments_mkfs)
                err = -errno;
                goto err_out;
        }
+
+       if (erofs_sb_has_fragments(sbi) && sbi->packed_nid > 0) {
+               struct erofs_inode ei = {
+                       .sbi = sbi,
+                       .nid = sbi->packed_nid,
+               };
+
+               err = erofs_read_inode_from_disk(&ei);
+               if (err) {
+                       erofs_err("failed to read packed inode from disk: %s",
+                                 erofs_strerror(-errno));
+                       goto err_out;
+               }
+
+               err = fseek(epi->file, ei.i_size, SEEK_SET);
+               if (err) {
+                       err = -errno;
+                       goto err_out;
+               }
+               epi->uptodate_size = BLK_ROUND_UP(sbi, ei.i_size) / 8;
+               epi->uptodate = calloc(1, epi->uptodate_size);
+               if (!epi->uptodate) {
+                       err = -ENOMEM;
+                       goto err_out;
+               }
+       }
        return 0;
 
 err_out:
        erofs_packedfile_exit(sbi);
        return err;
 }
+
+static int erofs_load_packedinode_from_disk(struct erofs_inode *pi)
+{
+       struct erofs_sb_info *sbi = pi->sbi;
+       int err;
+
+       if (pi->nid)
+               return 0;
+
+       pi->nid = sbi->packed_nid;
+       err = erofs_read_inode_from_disk(pi);
+       if (err) {
+               erofs_err("failed to read packed inode from disk: %s",
+                         erofs_strerror(err));
+               return err;
+       }
+       return 0;
+}
+
+static void *erofs_packedfile_preload(struct erofs_inode *pi,
+                                     struct erofs_map_blocks *map)
+{
+       struct erofs_sb_info *sbi = pi->sbi;
+       struct erofs_packed_inode *epi = sbi->packedinode;
+       unsigned int bsz = erofs_blksiz(sbi);
+       char *buffer;
+       erofs_off_t pos, end;
+       ssize_t err;
+
+       err = erofs_load_packedinode_from_disk(pi);
+       if (err)
+               return ERR_PTR(err);
+
+       pos = map->m_la;
+       err = erofs_map_blocks(pi, map, EROFS_GET_BLOCKS_FIEMAP);
+       if (err)
+               return ERR_PTR(err);
+
+       end = round_up(map->m_la + map->m_llen, bsz);
+       if (map->m_la < pos)
+               map->m_la = round_up(map->m_la, bsz);
+       else
+               DBG_BUGON(map->m_la > pos);
+
+       map->m_llen = end - map->m_la;
+       DBG_BUGON(!map->m_llen);
+       buffer = malloc(map->m_llen);
+       if (!buffer)
+               return ERR_PTR(-ENOMEM);
+
+       err = erofs_pread(pi, buffer, map->m_llen, map->m_la);
+       if (err)
+               goto err_out;
+
+       fflush(epi->file);
+       err = pwrite(fileno(epi->file), buffer, map->m_llen, map->m_la);
+       if (err < 0) {
+               err = -errno;
+               if (err == -ENOSPC) {
+                       (void)ftruncate(fileno(epi->file), 0);
+                       memset(epi->uptodate, 0, epi->uptodate_size);
+               }
+               goto err_out;
+       }
+       if (err != map->m_llen) {
+               err = -EIO;
+               goto err_out;
+       }
+       for (pos = map->m_la; pos < end; pos += bsz)
+               __erofs_set_bit(erofs_blknr(sbi, pos), epi->uptodate);
+       return buffer;
+
+err_out:
+       free(buffer);
+       map->m_llen = 0;
+       return ERR_PTR(err);
+}
+
+int erofs_packedfile_read(struct erofs_sb_info *sbi,
+                         void *buf, erofs_off_t len, erofs_off_t pos)
+{
+       struct erofs_packed_inode *epi = sbi->packedinode;
+       struct erofs_inode pi = {
+               .sbi = sbi,
+       };
+       struct erofs_map_blocks map = {
+               .index = UINT_MAX,
+       };
+       unsigned int bsz = erofs_blksiz(sbi);
+       erofs_off_t end = pos + len;
+       char *buffer = NULL;
+       int err;
+
+       if (!epi) {
+               err = erofs_load_packedinode_from_disk(&pi);
+               if (!err)
+                       err = erofs_pread(&pi, buf, len, pos);
+               return err;
+       }
+
+       err = 0;
+       while (pos < end) {
+               if (pos >= map.m_la && pos < map.m_la + map.m_llen) {
+                       len = min_t(erofs_off_t, end - pos,
+                                   map.m_la + map.m_llen - pos);
+                       memcpy(buf, buffer + pos - map.m_la, len);
+               } else {
+                       erofs_blk_t bnr = erofs_blknr(sbi, pos);
+                       bool uptodate;
+
+                       map.m_la = round_down(pos, bsz);
+                       len = min_t(erofs_off_t, bsz - (pos & (bsz - 1)),
+                                   end - pos);
+                       uptodate = __erofs_test_bit(bnr, epi->uptodate);
+                       if (!uptodate) {
+#if EROFS_MT_ENABLED
+                               pthread_mutex_lock(&epi->mutex);
+                               uptodate = __erofs_test_bit(bnr, epi->uptodate);
+                               if (!uptodate) {
+#endif
+                                       free(buffer);
+                                       buffer = erofs_packedfile_preload(&pi, 
&map);
+                                       if (IS_ERR(buffer)) {
+                                               buffer = NULL;
+                                               goto fallback;
+                                       }
+
+#if EROFS_MT_ENABLED
+                               }
+                               pthread_mutex_unlock(&epi->mutex);
+#endif
+                       }
+
+                       if (!uptodate)
+                               continue;
+
+                       err = pread(fileno(epi->file), buf, len, pos);
+                       if (err < 0)
+                               break;
+                       if (err == len) {
+                               err = 0;
+                       } else {
+fallback:
+                               err = erofs_load_packedinode_from_disk(&pi);
+                               if (!err)
+                                       err = erofs_pread(&pi, buf, len, pos);
+                               if (err)
+                                       break;
+                       }
+                       map.m_llen = 0;
+               }
+               buf += len;
+               pos += len;
+       }
+       free(buffer);
+       return err;
+}
-- 
2.43.5

Reply via email to