In order to fulfill reproducible builds, let's write all fragments to disk in a specific order, which strictly follows to the committing order.
Signed-off-by: Gao Xiang <hsiang...@linux.alibaba.com> --- include/erofs/fragments.h | 9 +- include/erofs/internal.h | 1 + lib/compress.c | 26 ++- lib/fragments.c | 345 ++++++++++++++++++++++++-------------- 4 files changed, 241 insertions(+), 140 deletions(-) diff --git a/include/erofs/fragments.h b/include/erofs/fragments.h index 75f1055..112f002 100644 --- a/include/erofs/fragments.h +++ b/include/erofs/fragments.h @@ -16,11 +16,12 @@ extern const char *erofs_frags_packedname; #define EROFS_PACKED_INODE erofs_frags_packedname u32 z_erofs_fragments_tofh(struct erofs_inode *inode, int fd, erofs_off_t fpos); -int z_erofs_fragments_dedupe(struct erofs_inode *inode, int fd, u32 tofh); +int erofs_fragment_findmatch(struct erofs_inode *inode, int fd, u32 tofh); -int z_erofs_pack_file_from_fd(struct erofs_inode *inode, int fd, u32 tofcrc); -int z_erofs_pack_fragments(struct erofs_inode *inode, void *data, - unsigned int len, u32 tofcrc); +int erofs_pack_file_from_fd(struct erofs_inode *inode, int fd, u32 tofcrc); +int erofs_fragment_pack(struct erofs_inode *inode, void *data, + erofs_off_t pos, erofs_off_t len, u32 tofh, bool tail); +int erofs_fragment_commit(struct erofs_inode *inode, u32 tofh); int erofs_flush_packed_inode(struct erofs_sb_info *sbi); int erofs_packedfile(struct erofs_sb_info *sbi); diff --git a/include/erofs/internal.h b/include/erofs/internal.h index 90bee07..73845f1 100644 --- a/include/erofs/internal.h +++ b/include/erofs/internal.h @@ -272,6 +272,7 @@ struct erofs_inode { union { unsigned int z_idataoff; erofs_off_t fragmentoff; + void *fragment; }; #define z_idata_size idata_size }; diff --git a/lib/compress.c b/lib/compress.c index a260dc4..706a756 100644 --- a/lib/compress.c +++ b/lib/compress.c @@ -526,11 +526,8 @@ static bool z_erofs_fixup_deduped_fragment(struct z_erofs_compress_sctx *ctx) return false; } - inode->fragmentoff += inode->fragment_size - newsize; inode->fragment_size = newsize; - - erofs_dbg("Reducing fragment size to %llu at %llu", - inode->fragment_size | 0ULL, inode->fragmentoff | 0ULL); + erofs_dbg("Reducing fragment size to %llu", inode->fragment_size | 0ULL); /* it's the end */ DBG_BUGON(ctx->tail - ctx->head + ctx->remaining != newsize); @@ -625,8 +622,8 @@ nocompression: compressedsize < ctx->pclustersize && (!inode->fragment_size || ictx->fix_dedupedfrag)) { frag_packing: - ret = z_erofs_pack_fragments(inode, ctx->queue + ctx->head, - len, ictx->tofh); + ret = erofs_fragment_pack(inode, ctx->queue + ctx->head, + ~0ULL, len, ictx->tofh, false); if (ret < 0) return ret; e->plen = 0; /* indicate a fragment */ @@ -1103,7 +1100,7 @@ int z_erofs_compress_segment(struct z_erofs_compress_sctx *ctx, DBG_BUGON(offset != -1 && frag && inode->fragment_size); if (offset != -1 && frag && !inode->fragment_size && cfg.c_fragdedupe != FRAGDEDUPE_OFF) { - ret = z_erofs_fragments_dedupe(inode, fd, ictx->tofh); + ret = erofs_fragment_findmatch(inode, fd, ictx->tofh); if (ret < 0) return ret; if (inode->fragment_size > ctx->remaining) @@ -1172,6 +1169,9 @@ int erofs_commit_compressed_file(struct z_erofs_compress_ictx *ictx, int ret; if (inode->fragment_size) { + ret = erofs_fragment_commit(inode, ictx->tofh); + if (ret) + goto err_free_idata; inode->z_advise |= Z_EROFS_ADVISE_FRAGMENT_PCLUSTER; erofs_sb_set_fragments(inode->sbi); } @@ -1210,12 +1210,10 @@ int erofs_commit_compressed_file(struct z_erofs_compress_ictx *ictx, legacymetasize = Z_EROFS_LEGACY_MAP_HEADER_SIZE; } - if (ptotal) { + if (ptotal) (void)erofs_bh_balloon(bh, ptotal); - } else { - if (!cfg.c_fragments && !cfg.c_dedupe) - DBG_BUGON(!inode->idata_size); - } + else if (!cfg.c_fragments && !cfg.c_dedupe) + DBG_BUGON(!inode->idata_size); erofs_info("compressed %s (%llu bytes) into %llu bytes", inode->i_srcpath, inode->i_size | 0ULL, ptotal | 0ULL); @@ -1629,7 +1627,7 @@ void *erofs_begin_compressed_file(struct erofs_inode *inode, int fd, u64 fpos) * Handle tails in advance to avoid writing duplicated * parts into the packed inode. */ - ret = z_erofs_fragments_dedupe(inode, fd, ictx->tofh); + ret = erofs_fragment_findmatch(inode, fd, ictx->tofh); if (ret < 0) goto err_free_ictx; @@ -1649,7 +1647,7 @@ void *erofs_begin_compressed_file(struct erofs_inode *inode, int fd, u64 fpos) ictx->dedupe = false; if (all_fragments && !inode->fragment_size) { - ret = z_erofs_pack_file_from_fd(inode, fd, ictx->tofh); + ret = erofs_pack_file_from_fd(inode, fd, ictx->tofh); if (ret) goto err_free_idata; } diff --git a/lib/fragments.c b/lib/fragments.c index 9f5f1f9..ce079af 100644 --- a/lib/fragments.c +++ b/lib/fragments.c @@ -20,12 +20,14 @@ #include "erofs/fragments.h" #include "erofs/bitops.h" #include "liberofs_private.h" +#ifdef HAVE_SYS_SENDFILE_H +#include <sys/sendfile.h> +#endif -struct erofs_fragment_dedupe_item { +struct erofs_fragmentitem { struct list_head list; - unsigned int length; - erofs_off_t pos; - u8 data[]; + u8 *data; + erofs_off_t length, pos; }; #define EROFS_FRAGMENT_INMEM_SZ_MAX (256 * 1024) @@ -34,8 +36,15 @@ struct erofs_fragment_dedupe_item { #define FRAGMENT_HASHSIZE 65536 #define FRAGMENT_HASH(c) ((c) & (FRAGMENT_HASHSIZE - 1)) +struct erofs_fragment_bucket { + struct list_head hash; +#ifdef EROFS_MT_ENABLED + pthread_rwlock_t lock; +#endif +}; + struct erofs_packed_inode { - struct list_head *hash; + struct erofs_fragment_bucket *bks; int fd; unsigned long *uptodate; #if EROFS_MT_ENABLED @@ -67,11 +76,49 @@ u32 z_erofs_fragments_tofh(struct erofs_inode *inode, int fd, erofs_off_t fpos) return hash != ~0U ? hash : 0; } -int z_erofs_fragments_dedupe(struct erofs_inode *inode, int fd, u32 tofh) +static erofs_off_t erofs_fragment_longmatch(struct erofs_inode *inode, + struct erofs_fragmentitem *fi, + erofs_off_t matched, int fd) { struct erofs_packed_inode *epi = inode->sbi->packedinode; - struct erofs_fragment_dedupe_item *cur, *di = NULL; - struct list_head *head = &epi->hash[FRAGMENT_HASH(tofh)]; + erofs_off_t pos; + bool inmem = false; + + if (!fi->pos) { + inmem = true; + pos = fi->length - matched; + } else { + pos = fi->pos - matched; + } + + while (matched < inode->i_size && pos) { + char buf[2][16384]; + unsigned int sz; + + sz = min_t(u64, pos, sizeof(buf[0])); + sz = min_t(u64, sz, inode->i_size - matched); + if (pread(fd, buf[0], sz, inode->i_size - matched - sz) != sz) + break; + + if (!inmem) { + if (pread(epi->fd, buf[1], sz, pos - sz) != sz) + break; + if (memcmp(buf[0], buf[1], sz)) + break; + } else if (memcmp(buf[0], fi->data + pos - sz, sz)) { + break; + } + pos -= sz; + matched += sz; + } + return matched; +} + +int erofs_fragment_findmatch(struct erofs_inode *inode, int fd, u32 tofh) +{ + struct erofs_packed_inode *epi = inode->sbi->packedinode; + struct erofs_fragmentitem *cur, *fi = NULL; + struct erofs_fragment_bucket *bk = &epi->bks[FRAGMENT_HASH(tofh)]; unsigned int s1, e1; erofs_off_t deduped; u8 *data; @@ -79,7 +126,7 @@ int z_erofs_fragments_dedupe(struct erofs_inode *inode, int fd, u32 tofh) if (inode->i_size <= EROFS_TOF_HASHLEN) return 0; - if (list_empty(head)) + if (list_empty(&bk->hash)) return 0; s1 = min_t(u64, EROFS_FRAGMENT_INMEM_SZ_MAX, inode->i_size); @@ -94,13 +141,21 @@ int z_erofs_fragments_dedupe(struct erofs_inode *inode, int fd, u32 tofh) } e1 = s1 - EROFS_TOF_HASHLEN; deduped = 0; - list_for_each_entry(cur, head, list) { + +#ifdef EROFS_MT_ENABLED + pthread_rwlock_rdlock(&bk->lock); +#endif + list_for_each_entry(cur, &bk->hash, list) { unsigned int e2, mn; - erofs_off_t i, pos; + erofs_off_t inmax, i; DBG_BUGON(cur->length <= EROFS_TOF_HASHLEN); - e2 = cur->length - EROFS_TOF_HASHLEN; - + if (cur->pos) + inmax = min_t(u64, cur->length, + EROFS_FRAGMENT_INMEM_SZ_MAX); + else + inmax = cur->length; + e2 = inmax - EROFS_TOF_HASHLEN; if (memcmp(data + e1, cur->data + e2, EROFS_TOF_HASHLEN)) continue; @@ -112,173 +167,212 @@ int z_erofs_fragments_dedupe(struct erofs_inode *inode, int fd, u32 tofh) i += EROFS_TOF_HASHLEN; if (i >= s1) { /* full short match */ DBG_BUGON(i > s1); - pos = cur->pos + cur->length - s1; - while (i < inode->i_size && pos) { - char buf[2][16384]; - unsigned int sz; - - sz = min_t(u64, pos, sizeof(buf[0])); - sz = min_t(u64, sz, inode->i_size - i); - if (pread(epi->fd, buf[0], sz, pos - sz) != sz) - break; - if (pread(fd, buf[1], sz, - inode->i_size - i - sz) != sz) - break; - - if (memcmp(buf[0], buf[1], sz)) - break; - pos -= sz; - i += sz; - } + i = erofs_fragment_longmatch(inode, cur, s1, fd); } if (i <= deduped) continue; - di = cur; + fi = cur; deduped = i; if (deduped == inode->i_size) break; } - +#ifdef EROFS_MT_ENABLED + pthread_rwlock_unlock(&bk->lock); +#endif free(data); if (deduped) { - DBG_BUGON(!di); + DBG_BUGON(!fi); inode->fragment_size = deduped; - inode->fragmentoff = di->pos + di->length - deduped; - erofs_dbg("Dedupe %llu tail data at %llu", - inode->fragment_size | 0ULL, inode->fragmentoff | 0ULL); + inode->fragment = fi; + erofs_dbg("Dedupe %llu tail data", inode->fragment_size | 0ULL); } return 0; } -static int z_erofs_fragments_dedupe_insert(struct erofs_inode *inode, - void *data, u32 tofh) +int erofs_fragment_pack(struct erofs_inode *inode, void *data, + erofs_off_t pos, erofs_off_t len, u32 tofh, bool tail) { struct erofs_packed_inode *epi = inode->sbi->packedinode; - struct erofs_fragment_dedupe_item *di; - erofs_off_t len = inode->fragment_size; - erofs_off_t pos = inode->fragmentoff; + struct erofs_fragment_bucket *bk = &epi->bks[FRAGMENT_HASH(tofh)]; + struct erofs_fragmentitem *fi; + bool inmem = (pos == ~0ULL); - if (len <= EROFS_TOF_HASHLEN) - return 0; - if (len > EROFS_FRAGMENT_INMEM_SZ_MAX) { - data += len - EROFS_FRAGMENT_INMEM_SZ_MAX; - pos += len - EROFS_FRAGMENT_INMEM_SZ_MAX; - len = EROFS_FRAGMENT_INMEM_SZ_MAX; - } - di = malloc(sizeof(*di) + len); - if (!di) + fi = malloc(sizeof(*fi)); + if (!fi) return -ENOMEM; + fi->length = len; + if (!inmem) { + pos += len; + if (len > EROFS_FRAGMENT_INMEM_SZ_MAX) { + if (!tail) + data += len - EROFS_FRAGMENT_INMEM_SZ_MAX; + len = EROFS_FRAGMENT_INMEM_SZ_MAX; + } + } - memcpy(di->data, data, len); - di->pos = pos; - di->length = len; - list_add_tail(&di->list, &epi->hash[FRAGMENT_HASH(tofh)]); + fi->data = malloc(len); + if (!fi->data) { + free(fi); + return -ENOMEM; + } + memcpy(fi->data, data, len); + fi->pos = inmem ? 0 : pos; + if (len > EROFS_TOF_HASHLEN) { + list_add_tail(&fi->list, &bk->hash); + } else { + init_list_head(&fi->list); + } + inode->fragment = fi; + inode->fragment_size = fi->length; + erofs_dbg("Recording %llu fragment data of %s", + fi->length | 0ULL, inode->i_srcpath); return 0; } -int z_erofs_pack_file_from_fd(struct erofs_inode *inode, int fd, u32 tofh) +int erofs_pack_file_from_fd(struct erofs_inode *inode, int fd, u32 tofh) { struct erofs_packed_inode *epi = inode->sbi->packedinode; - s64 offset, rc; + s64 offset, rc, sz; char *memblock; + bool onheap = false; offset = lseek(epi->fd, 0, SEEK_CUR); if (offset < 0) return -errno; - inode->fragmentoff = (erofs_off_t)offset; - inode->fragment_size = inode->i_size; - memblock = mmap(NULL, inode->i_size, PROT_READ, MAP_SHARED, fd, 0); if (memblock == MAP_FAILED || !memblock) { - unsigned long long remaining = inode->fragment_size; - - memblock = NULL; + erofs_off_t remaining = inode->i_size; + struct erofs_vfile vin = { .fd = fd }; + +#if defined(HAVE_SYS_SENDFILE_H) && defined(HAVE_SENDFILE) + do { + sz = min_t(u64, remaining, UINT_MAX); + rc = sendfile(epi->fd, fd, NULL, sz); + if (rc < 0) + goto out; + remaining -= rc; + } while (remaining); +#endif while (remaining) { char buf[32768]; - unsigned int sz = min_t(unsigned int, remaining, - sizeof(buf)); - - rc = read(fd, buf, sz); - if (rc != sz) { - if (rc <= 0) { - if (!rc) - rc = -EIO; - else - rc = -errno; + + sz = min_t(u64, remaining, sizeof(buf)); + rc = erofs_io_read(&vin, buf, sz); + if (rc < 0) + goto out; + if (rc > 0) { + rc = write(epi->fd, buf, rc); + if (rc < 0) goto out; - } - sz = rc; } - rc = __erofs_io_write(epi->fd, buf, sz); - if (rc != sz) { - if (rc >= 0) - rc = -EIO; - goto out; + remaining -= rc; + } + + sz = min_t(u64, inode->i_size, EROFS_FRAGMENT_INMEM_SZ_MAX); + memblock = malloc(sz); + if (!memblock) { + rc = -ENOMEM; + goto out; + } + onheap = true; + + rc = pread(epi->fd, memblock, sz, offset + inode->i_size - sz); + if (rc != sz) { + if (rc >= 0) { + DBG_BUGON(1); + rc = -EIO; } - remaining -= sz; + goto out; } + rc = lseek(fd, 0, SEEK_SET); if (rc < 0) { rc = -errno; goto out; } } else { - rc = __erofs_io_write(epi->fd, memblock, inode->fragment_size); - if (rc != inode->fragment_size) { + rc = __erofs_io_write(epi->fd, memblock, inode->i_size); + if (rc != inode->i_size) { if (rc >= 0) rc = -EIO; goto out; } } - erofs_dbg("Recording %llu fragment data at %llu of %s", - inode->fragment_size | 0ULL, inode->fragmentoff | 0ULL, - inode->i_srcpath); - - if (memblock) - rc = z_erofs_fragments_dedupe_insert(inode, memblock, tofh); - else - rc = 0; + rc = erofs_fragment_pack(inode, memblock, offset, inode->i_size, + tofh, onheap); out: - if (rc) - erofs_err("Failed to record %llu-byte fragment data @ %llu for nid %llu: %d", - inode->fragment_size | 0ULL, - inode->fragmentoff | 0ULL, inode->nid | 0ULL, (int)rc); - if (memblock) + if (onheap) + free(memblock); + else munmap(memblock, inode->i_size); return rc; } -int z_erofs_pack_fragments(struct erofs_inode *inode, void *data, - unsigned int len, u32 tofh) +int erofs_fragment_commit(struct erofs_inode *inode, u32 tofh) { struct erofs_packed_inode *epi = inode->sbi->packedinode; - s64 offset = lseek(epi->fd, 0, SEEK_CUR); + struct erofs_fragmentitem *fi = inode->fragment; + erofs_off_t len = inode->fragment_size; + unsigned int sz; + s64 offset; int ret; + if (!len) { + DBG_BUGON(fi); + return 0; + } + + if (fi->pos) { + inode->fragmentoff = fi->pos - len; + return 0; + } + + offset = lseek(epi->fd, 0, SEEK_CUR); if (offset < 0) return -errno; - inode->fragmentoff = (erofs_off_t)offset; - inode->fragment_size = len; - - ret = write(epi->fd, data, len); - if (ret != len) { + ret = write(epi->fd, fi->data, fi->length); + if (ret != fi->length) { if (ret < 0) return -errno; return -EIO; } + offset += fi->length; - erofs_dbg("Recording %llu fragment data at %llu of %s", - inode->fragment_size | 0ULL, inode->fragmentoff | 0ULL, - inode->i_srcpath); + if (!list_empty(&fi->list)) { +#ifdef EROFS_MT_ENABLED + struct erofs_fragment_bucket *bk = &epi->bks[FRAGMENT_HASH(tofh)]; +#endif + void *nb; + + sz = min_t(u64, fi->length, EROFS_FRAGMENT_INMEM_SZ_MAX); +#ifdef EROFS_MT_ENABLED + pthread_rwlock_wrlock(&bk->lock); +#endif + memmove(fi->data, fi->data + fi->length - sz, sz); - ret = z_erofs_fragments_dedupe_insert(inode, data, tofh); - if (ret) - return ret; - return len; + nb = realloc(fi->data, sz); + if (!nb) { +#ifdef EROFS_MT_ENABLED + pthread_rwlock_unlock(&bk->lock); +#endif + fi->data = NULL; + return -ENOMEM; + } + fi->data = nb; + fi->pos = (erofs_off_t)offset; +#ifdef EROFS_MT_ENABLED + pthread_rwlock_unlock(&bk->lock); +#endif + inode->fragmentoff = fi->pos - len; + return 0; + } + inode->fragmentoff = (erofs_off_t)offset - len; + free(fi); + return 0; } int erofs_flush_packed_inode(struct erofs_sb_info *sbi) @@ -306,8 +400,8 @@ int erofs_packedfile(struct erofs_sb_info *sbi) void erofs_packedfile_exit(struct erofs_sb_info *sbi) { struct erofs_packed_inode *epi = sbi->packedinode; - struct erofs_fragment_dedupe_item *di, *n; - int i; + struct erofs_fragmentitem *fi, *n; + struct erofs_fragment_bucket *bk; if (!epi) return; @@ -315,11 +409,14 @@ void erofs_packedfile_exit(struct erofs_sb_info *sbi) if (epi->uptodate) free(epi->uptodate); - if (epi->hash) { - for (i = 0; i < FRAGMENT_HASHSIZE; ++i) - list_for_each_entry_safe(di, n, &epi->hash[i], list) - free(di); - free(epi->hash); + if (epi->bks) { + for (bk = epi->bks; bk < &epi->bks[FRAGMENT_HASHSIZE]; ++bk) { + list_for_each_entry_safe(fi, n, &bk->hash, list) { + free(fi->data); + free(fi); + } + } + free(epi->bks); } if (epi->fd >= 0) @@ -342,13 +439,17 @@ int erofs_packedfile_init(struct erofs_sb_info *sbi, bool fragments_mkfs) sbi->packedinode = epi; if (fragments_mkfs) { - epi->hash = malloc(sizeof(*epi->hash) * FRAGMENT_HASHSIZE); - if (!epi->hash) { + epi->bks = malloc(sizeof(*epi->bks) * FRAGMENT_HASHSIZE); + if (!epi->bks) { err = -ENOMEM; goto err_out; } - for (i = 0; i < FRAGMENT_HASHSIZE; ++i) - init_list_head(&epi->hash[i]); + for (i = 0; i < FRAGMENT_HASHSIZE; ++i) { + init_list_head(&epi->bks[i].hash); +#ifdef EROFS_MT_ENABLED + pthread_rwlock_init(&epi->bks[i].lock, NULL); +#endif + } } epi->fd = erofs_tmpfile(); -- 2.43.5