The previous limit of 256 was too small, especially when inflight files are large, causing the job queue to be full while some compression workers remain idle.
Increase the default to 32768 and add the `--async-queue-limit` option to manually adjust this value. Signed-off-by: Gao Xiang <hsiang...@linux.alibaba.com> --- include/erofs/config.h | 9 +++++---- lib/inode.c | 46 +++++++++++++++++++++++------------------- mkfs/main.c | 15 ++++++++++++++ 3 files changed, 45 insertions(+), 25 deletions(-) diff --git a/include/erofs/config.h b/include/erofs/config.h index eb5cad8..e4d2bb3 100644 --- a/include/erofs/config.h +++ b/include/erofs/config.h @@ -81,10 +81,6 @@ struct erofs_configure { char c_force_chunkformat; /* < 0, xattr disabled and INT_MAX, always use inline xattrs */ int c_inline_xattr_tolerance; -#ifdef EROFS_MT_ENABLED - u64 c_mkfs_segment_size; - u32 c_mt_workers; -#endif u32 c_mkfs_pclustersize_max; u32 c_mkfs_pclustersize_def; u32 c_mkfs_pclustersize_packed; @@ -94,6 +90,11 @@ struct erofs_configure { const char *mount_point; long long c_uid_offset, c_gid_offset; u32 c_root_xattr_isize; +#ifdef EROFS_MT_ENABLED + u64 c_mkfs_segment_size; + u32 c_mt_workers; + u32 c_mt_async_queue_limit; +#endif #ifdef WITH_ANDROID char *target_out_path; char *fs_config_file; diff --git a/lib/inode.c b/lib/inode.c index 6598650..b53f0b6 100644 --- a/lib/inode.c +++ b/lib/inode.c @@ -1301,6 +1301,7 @@ out: inode->i_diskbuf = NULL; inode->datasource = EROFS_INODE_DATA_SOURCE_NONE; } else { + DBG_BUGON(ctx->fd < 0); close(ctx->fd); } return ret; @@ -1360,6 +1361,9 @@ static int erofs_mkfs_jobfn(struct erofs_mkfs_jobitem *item) struct erofs_inode *inode = item->u.inode; int ret; + if (item->type >= EROFS_MKFS_JOB_MAX) + return 1; + if (item->type == EROFS_MKFS_JOB_NDIR) return erofs_mkfs_handle_nondirectory(&item->u.ndir); @@ -1394,8 +1398,6 @@ struct erofs_mkfs_dfops { bool idle; /* initialize as false before the dfops worker runs */ }; -#define EROFS_MT_QUEUE_SIZE 256 - static void erofs_mkfs_flushjobs(struct erofs_sb_info *sbi) { struct erofs_mkfs_dfops *q = sbi->mkfs_dfops; @@ -1406,7 +1408,7 @@ static void erofs_mkfs_flushjobs(struct erofs_sb_info *sbi) pthread_mutex_unlock(&q->lock); } -static void *erofs_mkfs_pop_jobitem(struct erofs_mkfs_dfops *q) +static void *erofs_mkfs_top_jobitem(struct erofs_mkfs_dfops *q) { struct erofs_mkfs_jobitem *item; @@ -1417,31 +1419,34 @@ static void *erofs_mkfs_pop_jobitem(struct erofs_mkfs_dfops *q) pthread_cond_signal(&q->drain); pthread_cond_wait(&q->empty, &q->lock); } + item = q->queue + (q->head & (q->entries - 1)); + pthread_mutex_unlock(&q->lock); + return item; +} - item = q->queue + q->head; - q->head = (q->head + 1) & (q->entries - 1); - +static void erofs_mkfs_pop_jobitem(struct erofs_mkfs_dfops *q) +{ + pthread_mutex_lock(&q->lock); + DBG_BUGON(q->head == q->tail); + ++q->head; pthread_cond_signal(&q->full); pthread_mutex_unlock(&q->lock); - return item; } static void *z_erofs_mt_dfops_worker(void *arg) { struct erofs_sb_info *sbi = arg; - int ret = 0; + struct erofs_mkfs_dfops *dfops = sbi->mkfs_dfops; + int ret; - while (1) { + do { struct erofs_mkfs_jobitem *item; - item = erofs_mkfs_pop_jobitem(sbi->mkfs_dfops); - if (item->type >= EROFS_MKFS_JOB_MAX) - break; + item = erofs_mkfs_top_jobitem(dfops); ret = erofs_mkfs_jobfn(item); - if (ret) - break; - } - pthread_exit((void *)(uintptr_t)ret); + erofs_mkfs_pop_jobitem(dfops); + } while (!ret); + pthread_exit((void *)(uintptr_t)(ret < 0 ? ret : 0)); } static int erofs_mkfs_go(struct erofs_sb_info *sbi, @@ -1452,14 +1457,13 @@ static int erofs_mkfs_go(struct erofs_sb_info *sbi, pthread_mutex_lock(&q->lock); - while (((q->tail + 1) & (q->entries - 1)) == q->head) + while (q->tail - q->head >= q->entries) pthread_cond_wait(&q->full, &q->lock); - item = q->queue + q->tail; + item = q->queue + (q->tail++ & (q->entries - 1)); item->type = type; if (size) memcpy(&item->u, elem, size); - q->tail = (q->tail + 1) & (q->entries - 1); q->idle = false; pthread_cond_signal(&q->empty); @@ -1665,7 +1669,7 @@ static int erofs_mkfs_handle_inode(struct erofs_inode *inode) return ret; if (!S_ISDIR(inode->i_mode)) { - struct erofs_mkfs_job_ndir_ctx ctx = { .inode = inode }; + struct erofs_mkfs_job_ndir_ctx ctx = { .inode = inode, .fd = -1 }; if (!S_ISLNK(inode->i_mode) && inode->i_size) { ctx.fd = open(inode->i_srcpath, O_RDONLY | O_BINARY); @@ -1897,7 +1901,7 @@ static int erofs_mkfs_build_tree(struct erofs_mkfs_buildtree_ctx *ctx) if (!q) return -ENOMEM; - q->entries = EROFS_MT_QUEUE_SIZE; + q->entries = cfg.c_mt_async_queue_limit ?: 32768; q->queue = malloc(q->entries * sizeof(*q->queue)); if (!q->queue) { free(q); diff --git a/mkfs/main.c b/mkfs/main.c index 8497637..d1b6073 100644 --- a/mkfs/main.c +++ b/mkfs/main.c @@ -86,6 +86,9 @@ static struct option long_options[] = { {"sort", required_argument, NULL, 527}, {"hard-dereference", no_argument, NULL, 528}, {"dsunit", required_argument, NULL, 529}, +#ifdef EROFS_MT_ENABLED + {"async-queue-limit", required_argument, NULL, 530}, +#endif {0, 0, 0, 0}, }; @@ -159,6 +162,9 @@ static void usage(int argc, char **argv) " --mkfs-time the timestamp is applied as build time only\n" " -UX use a given filesystem UUID\n" " --all-root make all files owned by root\n" +#ifdef EROFS_MT_ENABLED + " --async-queue-limit=# specify the maximum number of entries in the multi-threaded job queue\n" +#endif " --blobdev=X specify an extra device X to store chunked data\n" " --chunksize=# generate chunk-based files with #-byte chunks\n" " --clean=X run full clean build (default) or:\n" @@ -905,6 +911,15 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) return -EINVAL; } break; +#ifdef EROFS_MT_ENABLED + case 530: + cfg.c_mt_async_queue_limit = strtoul(optarg, &endptr, 0); + if (*endptr != '\0') { + erofs_err("invalid async-queue-limit %s", optarg); + return -EINVAL; + } + break; +#endif case 'V': version(); exit(0); -- 2.43.5