Add an extended option `-E48bit` to indicate 48-bit block addressing is used. Then, 32-byte inodes is preferred since they have per-inode timestamps too.
Signed-off-by: Gao Xiang <hsiang...@linux.alibaba.com> --- v2: - fix chunk-based inode support. lib/blobchunk.c | 27 ++++++++------- lib/inode.c | 88 ++++++++++++++++++++++++++++++++++++++++++------- mkfs/main.c | 27 ++++++++++++--- 3 files changed, 116 insertions(+), 26 deletions(-) diff --git a/lib/blobchunk.c b/lib/blobchunk.c index 799bdd2..add0f33 100644 --- a/lib/blobchunk.c +++ b/lib/blobchunk.c @@ -141,11 +141,11 @@ int erofs_blob_write_chunk_indexes(struct erofs_inode *inode, struct erofs_sb_info *sbi = inode->sbi; erofs_blk_t remaining_blks = BLK_ROUND_UP(sbi, inode->i_size); struct erofs_inode_chunk_index idx = {0}; + erofs_blk_t extent_end = EROFS_NULL_ADDR, chunkblks; erofs_blk_t extent_start = EROFS_NULL_ADDR; - erofs_blk_t extent_end, chunkblks; erofs_off_t source_offset; unsigned int dst, src, unit, zeroedlen; - bool first_extent = true; + bool _48bit, first_extent = true; if (inode->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES) unit = sizeof(struct erofs_inode_chunk_index); @@ -153,24 +153,25 @@ int erofs_blob_write_chunk_indexes(struct erofs_inode *inode, unit = EROFS_BLOCK_MAP_ENTRY_SIZE; chunkblks = 1U << (inode->u.chunkformat & EROFS_CHUNK_FORMAT_BLKBITS_MASK); + _48bit = inode->u.chunkformat & EROFS_CHUNK_FORMAT_48BIT; for (dst = src = 0; dst < inode->extent_isize; src += sizeof(void *), dst += unit) { struct erofs_blobchunk *chunk; + erofs_blk_t startblk; chunk = *(void **)(inode->chunkindexes + src); if (chunk->blkaddr == EROFS_NULL_ADDR) { - idx.startblk_lo = EROFS_NULL_ADDR; + startblk = EROFS_NULL_ADDR; } else if (chunk->device_id) { DBG_BUGON(!(inode->u.chunkformat & EROFS_CHUNK_FORMAT_INDEXES)); - idx.startblk_lo = chunk->blkaddr; + startblk = chunk->blkaddr; extent_start = EROFS_NULL_ADDR; } else { - idx.startblk_lo = remapped_base + chunk->blkaddr; + startblk = remapped_base + chunk->blkaddr; } - if (extent_start == EROFS_NULL_ADDR || - idx.startblk_lo != extent_end) { + if (extent_start == EROFS_NULL_ADDR || startblk != extent_end) { if (extent_start != EROFS_NULL_ADDR) { remaining_blks -= extent_end - extent_start; tarerofs_blocklist_write(extent_start, @@ -182,12 +183,14 @@ int erofs_blob_write_chunk_indexes(struct erofs_inode *inode, first_extent, false); first_extent = false; } - extent_start = idx.startblk_lo; + extent_start = startblk; source_offset = chunk->sourceoffset; } - extent_end = idx.startblk_lo + chunkblks; + extent_end = startblk + chunkblks; idx.device_id = cpu_to_le16(chunk->device_id); - idx.startblk_lo = cpu_to_le32(idx.startblk_lo); + idx.startblk_lo = cpu_to_le32(startblk); + idx.startblk_hi = cpu_to_le32(startblk >> 32); + DBG_BUGON(!_48bit && idx.startblk_hi); if (unit == EROFS_BLOCK_MAP_ENTRY_SIZE) memcpy(inode->chunkindexes + dst, &idx.startblk_lo, unit); @@ -195,8 +198,8 @@ int erofs_blob_write_chunk_indexes(struct erofs_inode *inode, memcpy(inode->chunkindexes + dst, &idx, sizeof(idx)); } off = roundup(off, unit); - extent_end = min(extent_end, extent_start + remaining_blks); if (extent_start != EROFS_NULL_ADDR) { + extent_end = min(extent_end, extent_start + remaining_blks); zeroedlen = inode->i_size & (erofs_blksiz(sbi) - 1); if (zeroedlen) zeroedlen = erofs_blksiz(sbi) - zeroedlen; @@ -368,6 +371,8 @@ int erofs_blob_write_chunked_file(struct erofs_inode *inode, int fd, goto err; } + if (chunk->blkaddr >= UINT32_MAX) + inode->u.chunkformat |= EROFS_CHUNK_FORMAT_48BIT; if (!erofs_blob_can_merge(sbi, lastch, chunk)) { erofs_update_minextblks(sbi, interval_start, pos, &minextblks); diff --git a/lib/inode.c b/lib/inode.c index e63fb9f..cf3bcee 100644 --- a/lib/inode.c +++ b/lib/inode.c @@ -525,12 +525,29 @@ static bool erofs_file_is_compressible(struct erofs_inode *inode) return true; } -static int write_uncompressed_file_from_fd(struct erofs_inode *inode, int fd) +static int write_uncompressed_file_from_fd(struct erofs_inode *inode, int fd, + erofs_off_t fpos) { struct erofs_sb_info *sbi = inode->sbi; erofs_blk_t nblocks, i; unsigned int len; int ret; + bool noseek = inode->datasource == EROFS_INODE_DATA_SOURCE_DISKBUF; + + if (!noseek && erofs_sb_has_48bit(sbi)) { + if (lseek(fd, fpos, SEEK_DATA) < 0 && errno == ENXIO) { + ret = erofs_allocate_inode_bh_data(inode, 0); + if (ret) + return ret; + inode->datalayout = EROFS_INODE_FLAT_PLAIN; + return 0; + } + ret = lseek(fd, fpos, SEEK_SET); + if (ret < 0) + return ret; + else if (ret != fpos) + return -EIO; + } inode->datalayout = EROFS_INODE_FLAT_INLINE; nblocks = inode->i_size >> sbi->blkszbits; @@ -545,7 +562,7 @@ static int write_uncompressed_file_from_fd(struct erofs_inode *inode, int fd) ret = erofs_io_xcopy(&sbi->bdev, erofs_pos(sbi, inode->u.i_blkaddr + i), &((struct erofs_vfile){ .fd = fd }), len, - inode->datasource == EROFS_INODE_DATA_SOURCE_DISKBUF); + noseek); if (ret) return ret; } @@ -580,7 +597,7 @@ int erofs_write_unencoded_file(struct erofs_inode *inode, int fd, u64 fpos) } /* fallback to all data uncompressed */ - return write_uncompressed_file_from_fd(inode, fd); + return write_uncompressed_file_from_fd(inode, fd, fpos); } int erofs_iflush(struct erofs_inode *inode) @@ -593,7 +610,9 @@ int erofs_iflush(struct erofs_inode *inode) struct erofs_inode_extended die; } u = {}; union erofs_inode_i_u u1; - int ret; + union erofs_inode_i_nb nb; + bool nlink_1 = true; + int ret, fmt; if (inode->bh) off = erofs_btell(inode->bh, false); @@ -610,9 +629,22 @@ int erofs_iflush(struct erofs_inode *inode) else u1.startblk_lo = cpu_to_le32(inode->u.i_blkaddr); + if (is_inode_layout_compression(inode) && + inode->u.i_blocks > UINT32_MAX) { + nb.blocks_hi = cpu_to_le16(inode->u.i_blocks >> 32); + } else if (inode->datalayout != EROFS_INODE_CHUNK_BASED && + inode->u.i_blkaddr > UINT32_MAX) { + if (inode->u.i_blkaddr == EROFS_NULL_ADDR) + nlink_1 = false; + nb.startblk_hi = cpu_to_le16(inode->u.i_blkaddr >> 32); + } else { + nlink_1 = false; + nb = (union erofs_inode_i_nb){}; + } + switch (inode->inode_isize) { case sizeof(struct erofs_inode_compact): - u.dic.i_format = cpu_to_le16(0 | (inode->datalayout << 1)); + fmt = 0 | (inode->datalayout << 1); u.dic.i_xattr_icount = cpu_to_le16(icount); u.dic.i_mode = cpu_to_le16(inode->i_mode); u.dic.i_nb.nlink = cpu_to_le16(inode->i_nlink); @@ -622,7 +654,19 @@ int erofs_iflush(struct erofs_inode *inode) u.dic.i_uid = cpu_to_le16((u16)inode->i_uid); u.dic.i_gid = cpu_to_le16((u16)inode->i_gid); + if (!cfg.c_ignore_mtime) + u.dic.i_mtime = cpu_to_le64(inode->i_mtime - sbi->epoch); u.dic.i_u = u1; + + if (nlink_1) { + if (inode->i_nlink != 1) + return -EFSCORRUPTED; + u.dic.i_nb = nb; + fmt |= 1 << EROFS_I_NLINK_1_BIT; + } else { + u.dic.i_nb.nlink = cpu_to_le16(inode->i_nlink); + } + u.dic.i_format = cpu_to_le16(fmt); break; case sizeof(struct erofs_inode_extended): u.die.i_format = cpu_to_le16(1 | (inode->datalayout << 1)); @@ -639,6 +683,7 @@ int erofs_iflush(struct erofs_inode *inode) u.die.i_mtime = cpu_to_le64(inode->i_mtime); u.die.i_mtime_nsec = cpu_to_le32(inode->i_mtime_nsec); u.die.i_u = u1; + u.die.i_nb = nb; break; default: erofs_err("unsupported on-disk inode version of nid %llu", @@ -725,6 +770,19 @@ static int erofs_prepare_tail_block(struct erofs_inode *inode) return 0; } +static bool erofs_inode_need_48bit(struct erofs_inode *inode) +{ + if (inode->datalayout == EROFS_INODE_CHUNK_BASED) { + if (inode->u.chunkformat & EROFS_CHUNK_FORMAT_48BIT) + return true; + } else if (!is_inode_layout_compression(inode)) { + if (inode->u.i_blkaddr != EROFS_NULL_ADDR && + inode->u.i_blkaddr > UINT32_MAX) + return true; + } + return false; +} + static int erofs_prepare_inode_buffer(struct erofs_inode *inode) { struct erofs_bufmgr *bmgr = inode->sbi->bmgr; @@ -733,6 +791,14 @@ static int erofs_prepare_inode_buffer(struct erofs_inode *inode) DBG_BUGON(inode->bh || inode->bh_inline); + if (erofs_inode_need_48bit(inode)) { + if (!erofs_sb_has_48bit(inode->sbi)) + return -ENOSPC; + if (inode->inode_isize == sizeof(struct erofs_inode_compact) && + inode->i_nlink != 1) + inode->inode_isize = + sizeof(struct erofs_inode_extended); + } inodesize = inode->inode_isize + inode->xattr_isize; if (inode->extent_isize) inodesize = roundup(inodesize, 8) + inode->extent_isize; @@ -929,9 +995,9 @@ static bool erofs_should_use_inode_extended(struct erofs_inode *inode) return true; if (inode->i_nlink > USHRT_MAX) return true; - if ((inode->i_mtime != inode->sbi->build_time || - inode->i_mtime_nsec != inode->sbi->fixed_nsec) && - !cfg.c_ignore_mtime) + if (!cfg.c_ignore_mtime && + !erofs_sb_has_48bit(inode->sbi) && + inode->i_mtime != inode->sbi->epoch) return true; return false; } @@ -1029,7 +1095,7 @@ int __erofs_fill_inode(struct erofs_inode *inode, struct stat *st, if (inode->i_mtime < sbi->build_time) break; case TIMESTAMP_FIXED: - inode->i_mtime = sbi->build_time; + inode->i_mtime = sbi->epoch + sbi->build_time; inode->i_mtime_nsec = sbi->fixed_nsec; default: break; @@ -1925,7 +1991,7 @@ struct erofs_inode *erofs_mkfs_build_special_from_fd(struct erofs_sb_info *sbi, if (ret < 0) return ERR_PTR(-errno); } - ret = write_uncompressed_file_from_fd(inode, fd); + ret = write_uncompressed_file_from_fd(inode, fd, 0); if (ret) return ERR_PTR(ret); out: @@ -1997,7 +2063,7 @@ struct erofs_inode *erofs_rebuild_make_root(struct erofs_sb_info *sbi) root->i_srcpath = strdup("/"); root->i_mode = S_IFDIR | 0777; root->i_parent = root; - root->i_mtime = root->sbi->build_time; + root->i_mtime = root->sbi->epoch + root->sbi->build_time; root->i_mtime_nsec = root->sbi->fixed_nsec; erofs_init_empty_dir(root); return root; diff --git a/mkfs/main.c b/mkfs/main.c index d604c77..2defa92 100644 --- a/mkfs/main.c +++ b/mkfs/main.c @@ -322,6 +322,18 @@ static int erofs_mkfs_feat_set_fragdedupe(bool en, const char *val, return 0; } +static int erofs_mkfs_feat_set_48bit(bool en, const char *val, + unsigned int vallen) +{ + if (vallen) + return -EINVAL; + if (en) + erofs_sb_set_48bit(&g_sbi); + else + erofs_sb_clear_48bit(&g_sbi); + return 0; +} + static struct { char *feat; int (*set)(bool en, const char *val, unsigned int len); @@ -332,6 +344,7 @@ static struct { {"all-fragments", erofs_mkfs_feat_set_all_fragments}, {"dedupe", erofs_mkfs_feat_set_dedupe}, {"fragdedupe", erofs_mkfs_feat_set_fragdedupe}, + {"48bit", erofs_mkfs_feat_set_48bit}, {NULL, NULL}, }; @@ -1201,6 +1214,7 @@ int main(int argc, char **argv) erofs_blk_t nblocks = 0; struct timeval t; FILE *blklst = NULL; + s64 mkfs_time = 0; u32 crc; erofs_init_configure(); @@ -1220,12 +1234,17 @@ int main(int argc, char **argv) return 1; } + g_sbi.fixed_nsec = 0; if (cfg.c_unix_timestamp != -1) { - g_sbi.build_time = cfg.c_unix_timestamp; - g_sbi.fixed_nsec = 0; + mkfs_time = cfg.c_unix_timestamp; } else if (!gettimeofday(&t, NULL)) { - g_sbi.build_time = t.tv_sec; - g_sbi.fixed_nsec = t.tv_usec; + mkfs_time = t.tv_sec; + } + if (erofs_sb_has_48bit(&g_sbi)) { + g_sbi.epoch = max_t(s64, 0, mkfs_time - UINT32_MAX); + g_sbi.build_time = mkfs_time - g_sbi.epoch; + } else { + g_sbi.epoch = mkfs_time; } err = erofs_dev_open(&g_sbi, cfg.c_img_path, O_RDWR | -- 2.43.5