Use encoded extents if 48bit is set and metadata is smaller for big pclusters.
For Zstd, since it doesn't natively support fixed-sized output compression, switch to use fixed-sized input compression if `--max-extent-bytes=` is specified and no more than `-C`. Later we might introduce a simpilified option for users too. Signed-off-by: Gao Xiang <hsiang...@linux.alibaba.com> --- include/erofs/internal.h | 1 + include/erofs_fs.h | 3 +- lib/compress.c | 257 ++++++++++++++++++++++++++++++--------- lib/compressor.c | 11 ++ lib/compressor.h | 6 + lib/compressor_libzstd.c | 17 +++ 6 files changed, 235 insertions(+), 60 deletions(-) diff --git a/include/erofs/internal.h b/include/erofs/internal.h index 227e830..7a21044 100644 --- a/include/erofs/internal.h +++ b/include/erofs/internal.h @@ -269,6 +269,7 @@ struct erofs_inode { unsigned int z_idataoff; erofs_off_t fragmentoff; }; + unsigned int z_extents; #define z_idata_size idata_size }; }; diff --git a/include/erofs_fs.h b/include/erofs_fs.h index ce319d7..77af967 100644 --- a/include/erofs_fs.h +++ b/include/erofs_fs.h @@ -413,8 +413,9 @@ struct z_erofs_lcluster_index { } di_u; }; +#define Z_EROFS_MAP_HEADER_START(end) round_up(end, 8) #define Z_EROFS_MAP_HEADER_END(end) \ - (round_up(end, 8) + sizeof(struct z_erofs_map_header)) + (Z_EROFS_MAP_HEADER_START(end) + sizeof(struct z_erofs_map_header)) #define Z_EROFS_FULL_INDEX_START(end) (Z_EROFS_MAP_HEADER_END(end) + 8) #define Z_EROFS_EXTENT_PLEN_PARTIAL BIT(27) diff --git a/lib/compress.c b/lib/compress.c index 98288d4..0a8f893 100644 --- a/lib/compress.c +++ b/lib/compress.c @@ -49,6 +49,8 @@ struct z_erofs_compress_ictx { /* inode context */ u32 tof_chksum; bool fix_dedupedfrag; bool fragemitted; + bool dedupe; + bool data_unaligned; /* fields for write indexes */ u8 *metacur; @@ -78,13 +80,12 @@ struct z_erofs_compress_sctx { /* segment context */ unsigned int head, tail; unsigned int pclustersize; - erofs_off_t pstart; + erofs_off_t pstart, poff; u16 clusterofs; int seg_idx; void *membuf; - erofs_off_t memoff; }; #ifdef EROFS_MT_ENABLED @@ -336,10 +337,7 @@ static int z_erofs_compress_dedupe(struct z_erofs_compress_sctx *ctx) ei->e.partial = true; ei->e.length -= delta; } - - /* fall back to noncompact indexes for deduplication */ - inode->z_advise &= ~Z_EROFS_ADVISE_COMPACTED_2B; - inode->datalayout = EROFS_INODE_COMPRESSED_FULL; + ctx->ictx->dedupe = true; erofs_sb_set_dedupe(sbi); sbi->saved_by_deduplication += dctx.e.plen; @@ -389,8 +387,7 @@ static int write_uncompressed_block(struct z_erofs_compress_sctx *ctx, if (ctx->membuf) { erofs_dbg("Writing %u uncompressed data of %s", count, inode->i_srcpath); - memcpy(ctx->membuf + ctx->memoff, dst, erofs_blksiz(sbi)); - ctx->memoff += erofs_blksiz(sbi); + memcpy(ctx->membuf + ctx->poff, dst, erofs_blksiz(sbi)); } else { erofs_dbg("Writing %u uncompressed data to %llu", count, ctx->pstart | 0ULL); @@ -398,6 +395,7 @@ static int write_uncompressed_block(struct z_erofs_compress_sctx *ctx, if (ret) return ret; } + ctx->poff += erofs_blksiz(sbi); return count; } @@ -555,7 +553,9 @@ static int __z_erofs_compress_one(struct z_erofs_compress_sctx *ctx, bool is_packed_inode = erofs_is_packed_inode(inode); bool tsg = (ctx->seg_idx + 1 >= ictx->seg_num), final = !ctx->remaining; bool may_packing = (cfg.c_fragments && tsg && final && !is_packed_inode); - bool may_inline = (cfg.c_ztailpacking && tsg && final && !may_packing); + bool data_unaligned = ictx->data_unaligned; + bool may_inline = (cfg.c_ztailpacking && !data_unaligned && tsg && + final && !may_packing); unsigned int compressedsize; int ret; @@ -579,21 +579,32 @@ static int __z_erofs_compress_one(struct z_erofs_compress_sctx *ctx, } e->length = min(len, cfg.c_max_decompressed_extent_bytes); - ret = erofs_compress_destsize(h, ctx->queue + ctx->head, - &e->length, dst, ctx->pclustersize); - if (ret <= 0) { + if (data_unaligned) { + ret = erofs_compress(h, ctx->queue + ctx->head, e->length, + dst, ctx->pclustersize); + if (ret == -EOPNOTSUPP) { + data_unaligned = false; + goto retry_aligned; + } + } else { +retry_aligned: + ret = erofs_compress_destsize(h, ctx->queue + ctx->head, + &e->length, dst, ctx->pclustersize); + } + + if (ret > 0) { + compressedsize = ret; + /* even compressed size is smaller, there is no real gain */ + if (!data_unaligned && !(may_inline && e->length == len && ret < blksz)) + ret = roundup(ret, blksz); + } else if (ret != -ENOSPC) { erofs_err("failed to compress %s: %s", inode->i_srcpath, erofs_strerror(ret)); return ret; } - compressedsize = ret; - /* even compressed size is smaller, there is no real gain */ - if (!(may_inline && e->length == len && ret < blksz)) - ret = roundup(ret, blksz); - /* check if there is enough gain to keep the compressed data */ - if (ret * h->compress_threshold / 100 >= e->length) { + if (ret < 0 || ret * h->compress_threshold / 100 >= e->length) { if (may_inline && len < blksz) { ret = z_erofs_fill_inline_data(inode, ctx->queue + ctx->head, len, true); @@ -652,7 +663,7 @@ frag_packing: e->plen = blksz; e->raw = false; } else { - unsigned int tailused, padding; + unsigned int padding; /* * If there's space left for the last round when deduping @@ -660,7 +671,7 @@ frag_packing: * more to check whether it can be filled up. Fix the fragment * if succeeds. Otherwise, just drop it and go on packing. */ - if (may_packing && len == e->length && + if (!data_unaligned && may_packing && len == e->length && (compressedsize & (blksz - 1)) && ctx->tail < Z_EROFS_COMPR_QUEUE_SZ) { ctx->pclustersize = roundup(compressedsize, blksz); @@ -676,13 +687,12 @@ frag_packing: return ret; } - e->plen = round_up(compressedsize, blksz); + if (data_unaligned) + e->plen = compressedsize; + else + e->plen = round_up(compressedsize, blksz); DBG_BUGON(e->plen >= e->length); - - padding = 0; - tailused = compressedsize & (blksz - 1); - if (tailused) - padding = blksz - tailused; + padding = e->plen - compressedsize; /* zero out garbage trailing data for non-0padding */ if (!erofs_sb_has_lz4_0padding(sbi)) { @@ -695,9 +705,7 @@ frag_packing: erofs_dbg("Writing %u compressed data of %u bytes of %s", e->length, e->plen, inode->i_srcpath); - memcpy(ctx->membuf + ctx->memoff, - dst - padding, e->plen); - ctx->memoff += e->plen; + memcpy(ctx->membuf + ctx->poff, dst - padding, e->plen); } else { erofs_dbg("Writing %u compressed data to %llu of %u bytes", e->length, ctx->pstart, e->plen); @@ -707,6 +715,7 @@ frag_packing: if (ret) return ret; } + ctx->poff += e->plen; e->raw = false; may_inline = false; may_packing = false; @@ -979,30 +988,171 @@ static void z_erofs_write_mapheader(struct erofs_inode *inode, void *compressmeta) { struct erofs_sb_info *sbi = inode->sbi; - struct z_erofs_map_header h = { - .h_advise = cpu_to_le16(inode->z_advise), - .h_algorithmtype = inode->z_algorithmtype[1] << 4 | - inode->z_algorithmtype[0], - /* lclustersize */ - .h_clusterbits = inode->z_logical_clusterbits - sbi->blkszbits, - }; + struct z_erofs_map_header h; - if (inode->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER) - h.h_fragmentoff = cpu_to_le32(inode->fragmentoff); - else - h.h_idata_size = cpu_to_le16(inode->idata_size); + if (inode->datalayout == EROFS_INODE_COMPRESSED_FULL && + (inode->z_advise & Z_EROFS_ADVISE_EXTENTS)) { + int recsz = z_erofs_extent_recsize(inode->z_advise); + + if (recsz > offsetof(struct z_erofs_extent, pstart_hi)) { + h = (struct z_erofs_map_header) { + .h_advise = cpu_to_le16(inode->z_advise), + .h_extents_lo = cpu_to_le32(inode->z_extents), + }; + } else { + DBG_BUGON(inode->z_logical_clusterbits < sbi->blkszbits); + h = (struct z_erofs_map_header) { + .h_advise = cpu_to_le16(inode->z_advise), + .h_clusterbits = inode->z_logical_clusterbits - sbi->blkszbits, + }; + } + } else { + h = (struct z_erofs_map_header) { + .h_advise = cpu_to_le16(inode->z_advise), + .h_algorithmtype = inode->z_algorithmtype[1] << 4 | + inode->z_algorithmtype[0], + /* lclustersize */ + .h_clusterbits = inode->z_logical_clusterbits - sbi->blkszbits, + }; + if (inode->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER) + h.h_fragmentoff = cpu_to_le32(inode->fragmentoff); + else + h.h_idata_size = cpu_to_le16(inode->idata_size); - memset(compressmeta, 0, Z_EROFS_LEGACY_MAP_HEADER_SIZE); + memset(compressmeta, 0, Z_EROFS_LEGACY_MAP_HEADER_SIZE); + } /* write out map header */ memcpy(compressmeta, &h, sizeof(struct z_erofs_map_header)); } +#define EROFS_FULL_INDEXES_SZ(inode) \ + (BLK_ROUND_UP(inode->sbi, inode->i_size) * \ + sizeof(struct z_erofs_lcluster_index) + Z_EROFS_LEGACY_MAP_HEADER_SIZE) + +static void *z_erofs_write_extents(struct z_erofs_compress_ictx *ctx) +{ + struct erofs_inode *inode = ctx->inode; + struct erofs_sb_info *sbi = inode->sbi; + struct z_erofs_extent_item *ei, *n; + unsigned int lclusterbits, nexts; + bool pstart_hi = false, unaligned_data = false; + erofs_off_t pstart, pend, lstart; + unsigned int recsz, metasz, moff; + void *metabuf; + + ei = list_first_entry(&ctx->extents, struct z_erofs_extent_item, + list); + lclusterbits = max_t(u8, ilog2(ei->e.length - 1) + 1, sbi->blkszbits); + pend = pstart = ei->e.pstart; + nexts = 0; + list_for_each_entry(ei, &ctx->extents, list) { + pstart_hi |= (ei->e.pstart > UINT32_MAX); + if ((ei->e.pstart | ei->e.plen) & ((1U << sbi->blkszbits) - 1)) + unaligned_data = true; + if (pend != ei->e.pstart) + pend = EROFS_NULL_ADDR; + else + pend += ei->e.plen; + if (ei->e.length != 1 << lclusterbits) { + if (ei->list.next != &ctx->extents || + ei->e.length > 1 << lclusterbits) + lclusterbits = 0; + } + ++nexts; + } + + recsz = inode->i_size > UINT32_MAX ? 32 : 16; + if (lclusterbits) { + if (pend != EROFS_NULL_ADDR) + recsz = 4; + else if (recsz <= 16 && !pstart_hi) + recsz = 8; + } + + moff = Z_EROFS_MAP_HEADER_END(inode->inode_isize + inode->xattr_isize); + moff = round_up(moff, recsz) - + Z_EROFS_MAP_HEADER_START(inode->inode_isize + inode->xattr_isize); + metasz = moff + recsz * nexts + 8 * (recsz <= 4); + if (!unaligned_data && metasz > EROFS_FULL_INDEXES_SZ(inode)) + return ERR_PTR(-EAGAIN); + + metabuf = malloc(metasz); + if (!metabuf) + return ERR_PTR(-ENOMEM); + inode->z_logical_clusterbits = lclusterbits; + inode->z_extents = nexts; + ctx->metacur = metabuf + moff; + if (recsz <= 4) { + *(__le64 *)ctx->metacur = cpu_to_le64(pstart); + ctx->metacur += sizeof(__le64); + } + + nexts = 0; + lstart = 0; + list_for_each_entry_safe(ei, n, &ctx->extents, list) { + struct z_erofs_extent de; + u32 fmt, plen; + + plen = ei->e.plen; + if (!plen) { + plen = inode->fragmentoff; + ei->e.pstart = inode->fragmentoff >> 32; + } else { + fmt = ei->e.raw ? 0 : inode->z_algorithmtype[0] + 1; + plen |= fmt << Z_EROFS_EXTENT_PLEN_FMT_BIT; + if (ei->e.partial) + plen |= Z_EROFS_EXTENT_PLEN_PARTIAL; + } + de = (struct z_erofs_extent) { + .plen = cpu_to_le32(plen), + .pstart_lo = cpu_to_le32(ei->e.pstart), + .lstart_lo = cpu_to_le32(lstart), + .pstart_hi = cpu_to_le32(ei->e.pstart >> 32), + .lstart_hi = cpu_to_le32(lstart >> 32), + }; + memcpy(ctx->metacur, &de, recsz); + ctx->metacur += recsz; + lstart += ei->e.length; + list_del(&ei->list); + free(ei); + } + inode->datalayout = EROFS_INODE_COMPRESSED_FULL; + inode->z_advise |= Z_EROFS_ADVISE_EXTENTS | + ((ilog2(recsz) - 2) << Z_EROFS_ADVISE_EXTRECSZ_BIT); + return metabuf; +} + static void *z_erofs_write_indexes(struct z_erofs_compress_ictx *ctx) { struct erofs_inode *inode = ctx->inode; + struct erofs_sb_info *sbi = inode->sbi; struct z_erofs_extent_item *ei, *n; void *metabuf; + if (erofs_sb_has_48bit(sbi)) { + metabuf = z_erofs_write_extents(ctx); + if (metabuf != ERR_PTR(-EAGAIN)) { + if (IS_ERR(metabuf)) + return metabuf; + goto out; + } + } + + if (!cfg.c_legacy_compress && !ctx->dedupe && + inode->z_logical_clusterbits <= 14) { + if (inode->z_logical_clusterbits <= 12) + inode->z_advise |= Z_EROFS_ADVISE_COMPACTED_2B; + inode->datalayout = EROFS_INODE_COMPRESSED_COMPACT; + } else { + inode->datalayout = EROFS_INODE_COMPRESSED_FULL; + } + + if (erofs_sb_has_big_pcluster(sbi)) { + inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_1; + if (inode->datalayout == EROFS_INODE_COMPRESSED_COMPACT) + inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_2; + } + metabuf = malloc(BLK_ROUND_UP(inode->sbi, inode->i_size) * sizeof(struct z_erofs_lcluster_index) + Z_EROFS_LEGACY_MAP_HEADER_SIZE); @@ -1018,6 +1168,7 @@ static void *z_erofs_write_indexes(struct z_erofs_compress_ictx *ctx) free(ei); } z_erofs_fini_full_indexes(ctx); +out: z_erofs_write_mapheader(inode, metabuf); return metabuf; } @@ -1075,6 +1226,7 @@ int z_erofs_compress_segment(struct z_erofs_compress_sctx *ctx, int fd = ictx->fd; ctx->pstart = pstart; + ctx->poff = 0; while (ctx->remaining) { const u64 rx = min_t(u64, ctx->remaining, Z_EROFS_COMPR_QUEUE_SZ - ctx->tail); @@ -1310,8 +1462,6 @@ void z_erofs_mt_workfn(struct erofs_work *work, void *tlsp) ret = -ENOMEM; goto out; } - sctx->memoff = 0; - ret = z_erofs_compress_segment(sctx, sctx->seg_idx * cfg.c_mkfs_segment_size, EROFS_NULL_ADDR); @@ -1480,22 +1630,6 @@ void *erofs_begin_compressed_file(struct erofs_inode *inode, int fd, u64 fpos) /* initialize per-file compression setting */ inode->z_advise = 0; inode->z_logical_clusterbits = sbi->blkszbits; - if (!cfg.c_legacy_compress && inode->z_logical_clusterbits <= 14) { - if (inode->z_logical_clusterbits <= 12) - inode->z_advise |= Z_EROFS_ADVISE_COMPACTED_2B; - inode->datalayout = EROFS_INODE_COMPRESSED_COMPACT; - } else { - inode->datalayout = EROFS_INODE_COMPRESSED_FULL; - } - - if (erofs_sb_has_big_pcluster(sbi)) { - inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_1; - if (inode->datalayout == EROFS_INODE_COMPRESSED_COMPACT) - inode->z_advise |= Z_EROFS_ADVISE_BIG_PCLUSTER_2; - } - if (cfg.c_fragments && !cfg.c_dedupe) - inode->z_advise |= Z_EROFS_ADVISE_INTERLACED_PCLUSTER; - #ifndef NDEBUG if (cfg.c_random_algorithms) { while (1) { @@ -1530,6 +1664,11 @@ void *erofs_begin_compressed_file(struct erofs_inode *inode, int fd, u64 fpos) ictx->ccfg = &erofs_ccfg[inode->z_algorithmtype[0]]; inode->z_algorithmtype[0] = ictx->ccfg->algorithmtype; inode->z_algorithmtype[1] = 0; + ictx->data_unaligned = erofs_sb_has_48bit(sbi) && + cfg.c_max_decompressed_extent_bytes <= + z_erofs_get_max_pclustersize(inode); + if (cfg.c_fragments && !cfg.c_dedupe && !ictx->data_unaligned) + inode->z_advise |= Z_EROFS_ADVISE_INTERLACED_PCLUSTER; /* * Handle tails in advance to avoid writing duplicated diff --git a/lib/compressor.c b/lib/compressor.c index 41f49ff..6d8c1c2 100644 --- a/lib/compressor.c +++ b/lib/compressor.c @@ -85,6 +85,17 @@ int erofs_compress_destsize(const struct erofs_compress *c, return c->alg->c->compress_destsize(c, src, srcsize, dst, dstsize); } +int erofs_compress(const struct erofs_compress *c, + const void *src, unsigned int srcsize, + void *dst, unsigned int dstcapacity) +{ + DBG_BUGON(!c->alg); + if (!c->alg->c->compress) + return -EOPNOTSUPP; + + return c->alg->c->compress(c, src, srcsize, dst, dstcapacity); +} + int erofs_compressor_init(struct erofs_sb_info *sbi, struct erofs_compress *c, char *alg_name, int compression_level, u32 dict_size) { diff --git a/lib/compressor.h b/lib/compressor.h index 8d322d5..ea2d03d 100644 --- a/lib/compressor.h +++ b/lib/compressor.h @@ -26,6 +26,9 @@ struct erofs_compressor { int (*compress_destsize)(const struct erofs_compress *c, const void *src, unsigned int *srcsize, void *dst, unsigned int dstsize); + int (*compress)(const struct erofs_compress *c, + const void *src, unsigned int srcsize, + void *dst, unsigned dstcapacity); }; struct erofs_algorithm { @@ -60,6 +63,9 @@ int z_erofs_get_compress_algorithm_id(const struct erofs_compress *c); int erofs_compress_destsize(const struct erofs_compress *c, const void *src, unsigned int *srcsize, void *dst, unsigned int dstsize); +int erofs_compress(const struct erofs_compress *c, + const void *src, unsigned int srcsize, + void *dst, unsigned int dstcapacity); int erofs_compressor_init(struct erofs_sb_info *sbi, struct erofs_compress *c, char *alg_name, int compression_level, u32 dict_size); diff --git a/lib/compressor_libzstd.c b/lib/compressor_libzstd.c index 223806e..feacb85 100644 --- a/lib/compressor_libzstd.c +++ b/lib/compressor_libzstd.c @@ -8,6 +8,22 @@ #include "compressor.h" #include "erofs/atomic.h" +static int libzstd_compress(const struct erofs_compress *c, + const void *src, unsigned int srcsize, + void *dst, unsigned dstcapacity) +{ + ZSTD_CCtx *cctx = c->private_data; + size_t csize; + + csize = ZSTD_compress2(cctx, dst, dstcapacity, src, srcsize); + if (ZSTD_isError(csize)) { + if (ZSTD_getErrorCode(csize) == ZSTD_error_dstSize_tooSmall) + return -ENOSPC; + return -EFAULT; + } + return csize; +} + static int libzstd_compress_destsize(const struct erofs_compress *c, const void *src, unsigned int *srcsize, void *dst, unsigned int dstsize) @@ -139,5 +155,6 @@ const struct erofs_compressor erofs_compressor_libzstd = { .exit = compressor_libzstd_exit, .setlevel = erofs_compressor_libzstd_setlevel, .setdictsize = erofs_compressor_libzstd_setdictsize, + .compress = libzstd_compress, .compress_destsize = libzstd_compress_destsize, }; -- 2.43.5