Hi Friendy,

I applied the version as below:

Thanks,
Gao Xiang

>From 631ebfada7b6733ed31d70692f08a4e0bd3dc0b8 Mon Sep 17 00:00:00 2001
From: Friendy Su <friendy...@sony.com>
Date: Sat, 23 Aug 2025 16:34:53 +0800
Subject: [PATCH v2 applied] erofs-utils: mkfs: Implement 'dsunit' alignment on 
blobdev

Align inode data to huge pages on blobdev, where dsunit * blocksize =
2MiB.

When a file is mmap()'ed with dax=always, aligning to huge pages allows
the kernel to map a 2M huge page per page fault, instead of mapping
a 4KiB normal page for each page fault.

This greatly improves mmap() performance by reducing times of page
fault being triggered.

Note that `chunksize` should not be smaller than `dsunit` so that
data alignment is preserved after deduplication.

Signed-off-by: Friendy Su <friendy...@sony.com>
Reviewed-by: Yuezhang Mo <yuezhang...@sony.com>
Reviewed-by: Daniel Palmer <daniel.pal...@sony.com>
[ Gao Xiang: refine some informational messages. ]
Signed-off-by: Gao Xiang <hsiang...@linux.alibaba.com>
---
 lib/blobchunk.c  | 19 +++++++++++++++++++
 man/mkfs.erofs.1 | 13 +++++++++++++
 mkfs/main.c      | 15 +++++++++++++++
 3 files changed, 47 insertions(+)

diff --git a/lib/blobchunk.c b/lib/blobchunk.c
index af6ddd7..4ed463f 100644
--- a/lib/blobchunk.c
+++ b/lib/blobchunk.c
@@ -309,6 +309,25 @@ int erofs_blob_write_chunked_file(struct erofs_inode 
*inode, int fd,
        minextblks = BLK_ROUND_UP(sbi, inode->i_size);
        interval_start = 0;
 
+       /*
+        * If dsunit <= chunksize, deduplication will not cause misalignment,
+        * so it's uncontroversial to apply the current data alignment policy.
+        */
+       if (sbi->bmgr->dsunit > 1 &&
+           sbi->bmgr->dsunit <= 1u << (chunkbits - sbi->blkszbits)) {
+               off_t off = lseek(blobfile, 0, SEEK_CUR);
+
+               off = roundup(off, sbi->bmgr->dsunit * erofs_blksiz(sbi));
+               if (lseek(blobfile, off, SEEK_SET) != off) {
+                       ret = -errno;
+                       erofs_err("failed to lseek blobdev@0x%llx: %s", off,
+                                 erofs_strerror(ret));
+                       goto err;
+               }
+               erofs_dbg("Align /%s on block #%d (0x%llx)",
+                         erofs_fspath(inode->i_srcpath), erofs_blknr(sbi, 
off), off);
+       }
+
        for (pos = 0; pos < inode->i_size; pos += len) {
 #ifdef SEEK_DATA
                off_t offset = lseek(fd, pos + startoff, SEEK_DATA);
diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1
index 63f7a2f..cc5a310 100644
--- a/man/mkfs.erofs.1
+++ b/man/mkfs.erofs.1
@@ -168,6 +168,19 @@ the output filesystem, with no leading /.
 .TP
 .BI "\-\-dsunit=" #
 Align all data block addresses to multiples of #.
+
+If \fI--dsunit\fR and \fI--chunksize\fR are both set, \fI--dsunit\fR will be
+ignored if it is larger than \fI--chunksize\fR.
+
+If \fI--dsunit\fR is larger, it spans multiple chunks, for example:
+\fI-b 4096\fR, \fI--dsunit 512\fR (2MiB), \fI--chunksize 4096\fR
+
+Once a chunk is deduplicated, all subsequent chunks will no longer be
+aligned. For optimal performance, it is recommended to set \fI--dsunit\fR to
+the same value as \fI--chunksize\fR:
+
+E.g. \fI-b\fR 4096, \fI--dsunit 512\fR (2MiB), \fI--chunksize $((4096*512))\fR
+
 .TP
 .BI "\-\-exclude-path=" path
 Ignore file that matches the exact literal path.
diff --git a/mkfs/main.c b/mkfs/main.c
index e0ba55d..2e6de00 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -1298,6 +1298,21 @@ static int mkfs_parse_options_cfg(struct 
erofs_importer_params *params,
                return -EINVAL;
        }
 
+       /*
+        * chunksize must be greater than or equal to dsunit to keep
+        * data alignment working.
+        *
+        * If chunksize is smaller than dsunit (e.g., chunksize=4K, dsunit=2M),
+        * deduplicating a chunk will cause all subsequent data to become
+        * unaligned. Therefore, let's issue a warning here and still skip
+        * alignment for now.
+        */
+       if (cfg.c_chunkbits && dsunit &&
+           1u << (cfg.c_chunkbits - g_sbi.blkszbits) < dsunit) {
+               erofs_warn("chunksize %u bytes is smaller than dsunit %u 
blocks, ignore dsunit !",
+                          1u << cfg.c_chunkbits, dsunit);
+       }
+
        if (pclustersize_packed) {
                if (pclustersize_packed < (1U << mkfs_blkszbits) ||
                    pclustersize_packed % (1U << mkfs_blkszbits)) {
-- 
2.43.5


Reply via email to