Hi Friendy, I applied the version as below:
Thanks, Gao Xiang >From 631ebfada7b6733ed31d70692f08a4e0bd3dc0b8 Mon Sep 17 00:00:00 2001 From: Friendy Su <friendy...@sony.com> Date: Sat, 23 Aug 2025 16:34:53 +0800 Subject: [PATCH v2 applied] erofs-utils: mkfs: Implement 'dsunit' alignment on blobdev Align inode data to huge pages on blobdev, where dsunit * blocksize = 2MiB. When a file is mmap()'ed with dax=always, aligning to huge pages allows the kernel to map a 2M huge page per page fault, instead of mapping a 4KiB normal page for each page fault. This greatly improves mmap() performance by reducing times of page fault being triggered. Note that `chunksize` should not be smaller than `dsunit` so that data alignment is preserved after deduplication. Signed-off-by: Friendy Su <friendy...@sony.com> Reviewed-by: Yuezhang Mo <yuezhang...@sony.com> Reviewed-by: Daniel Palmer <daniel.pal...@sony.com> [ Gao Xiang: refine some informational messages. ] Signed-off-by: Gao Xiang <hsiang...@linux.alibaba.com> --- lib/blobchunk.c | 19 +++++++++++++++++++ man/mkfs.erofs.1 | 13 +++++++++++++ mkfs/main.c | 15 +++++++++++++++ 3 files changed, 47 insertions(+) diff --git a/lib/blobchunk.c b/lib/blobchunk.c index af6ddd7..4ed463f 100644 --- a/lib/blobchunk.c +++ b/lib/blobchunk.c @@ -309,6 +309,25 @@ int erofs_blob_write_chunked_file(struct erofs_inode *inode, int fd, minextblks = BLK_ROUND_UP(sbi, inode->i_size); interval_start = 0; + /* + * If dsunit <= chunksize, deduplication will not cause misalignment, + * so it's uncontroversial to apply the current data alignment policy. + */ + if (sbi->bmgr->dsunit > 1 && + sbi->bmgr->dsunit <= 1u << (chunkbits - sbi->blkszbits)) { + off_t off = lseek(blobfile, 0, SEEK_CUR); + + off = roundup(off, sbi->bmgr->dsunit * erofs_blksiz(sbi)); + if (lseek(blobfile, off, SEEK_SET) != off) { + ret = -errno; + erofs_err("failed to lseek blobdev@0x%llx: %s", off, + erofs_strerror(ret)); + goto err; + } + erofs_dbg("Align /%s on block #%d (0x%llx)", + erofs_fspath(inode->i_srcpath), erofs_blknr(sbi, off), off); + } + for (pos = 0; pos < inode->i_size; pos += len) { #ifdef SEEK_DATA off_t offset = lseek(fd, pos + startoff, SEEK_DATA); diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1 index 63f7a2f..cc5a310 100644 --- a/man/mkfs.erofs.1 +++ b/man/mkfs.erofs.1 @@ -168,6 +168,19 @@ the output filesystem, with no leading /. .TP .BI "\-\-dsunit=" # Align all data block addresses to multiples of #. + +If \fI--dsunit\fR and \fI--chunksize\fR are both set, \fI--dsunit\fR will be +ignored if it is larger than \fI--chunksize\fR. + +If \fI--dsunit\fR is larger, it spans multiple chunks, for example: +\fI-b 4096\fR, \fI--dsunit 512\fR (2MiB), \fI--chunksize 4096\fR + +Once a chunk is deduplicated, all subsequent chunks will no longer be +aligned. For optimal performance, it is recommended to set \fI--dsunit\fR to +the same value as \fI--chunksize\fR: + +E.g. \fI-b\fR 4096, \fI--dsunit 512\fR (2MiB), \fI--chunksize $((4096*512))\fR + .TP .BI "\-\-exclude-path=" path Ignore file that matches the exact literal path. diff --git a/mkfs/main.c b/mkfs/main.c index e0ba55d..2e6de00 100644 --- a/mkfs/main.c +++ b/mkfs/main.c @@ -1298,6 +1298,21 @@ static int mkfs_parse_options_cfg(struct erofs_importer_params *params, return -EINVAL; } + /* + * chunksize must be greater than or equal to dsunit to keep + * data alignment working. + * + * If chunksize is smaller than dsunit (e.g., chunksize=4K, dsunit=2M), + * deduplicating a chunk will cause all subsequent data to become + * unaligned. Therefore, let's issue a warning here and still skip + * alignment for now. + */ + if (cfg.c_chunkbits && dsunit && + 1u << (cfg.c_chunkbits - g_sbi.blkszbits) < dsunit) { + erofs_warn("chunksize %u bytes is smaller than dsunit %u blocks, ignore dsunit !", + 1u << cfg.c_chunkbits, dsunit); + } + if (pclustersize_packed) { if (pclustersize_packed < (1U << mkfs_blkszbits) || pclustersize_packed % (1U << mkfs_blkszbits)) { -- 2.43.5