Currently for large job counts, pthread_slice.c acquires and releases a lock between each job. Acquiring the locks can take more time than the job itself.
The DDS and Hap decoders naively create a job per 4x4 pixel block. For a 4Kx2K frame: decode before patch: 1562ms decode after patch: 14ms Clients probably should be able to submit jobs without having to consider the number of threads the jobs will be run on, and this is a possible solution to that. It makes the assumption that all jobs will take roughly the same amount of time and that threads will be scheduled evenly. --- libavcodec/pthread_slice.c | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/libavcodec/pthread_slice.c b/libavcodec/pthread_slice.c index c8e69f0..751e5f4 100644 --- a/libavcodec/pthread_slice.c +++ b/libavcodec/pthread_slice.c @@ -50,9 +50,11 @@ typedef struct SliceThreadContext { action_func2 *func2; void *args; int *rets; + int *subjob_limits; + int *subjob_offsets; int rets_count; int job_count; - int job_size; + int subjob_size; pthread_cond_t last_job_cond; pthread_cond_t current_job_cond; @@ -76,6 +78,7 @@ static void* attribute_align_arg worker(void *v) int our_job = c->job_count; int thread_count = avctx->thread_count; int self_id; + int i; pthread_mutex_lock(&c->current_job_lock); self_id = c->current_job++; @@ -96,8 +99,10 @@ static void* attribute_align_arg worker(void *v) } pthread_mutex_unlock(&c->current_job_lock); - c->rets[our_job%c->rets_count] = c->func ? c->func(avctx, (char*)c->args + our_job*c->job_size): - c->func2(avctx, c->args, our_job, self_id); + for (i = c->subjob_offsets[our_job]; i < c->subjob_limits[our_job]; i++) { + c->rets[i%c->rets_count] = c->func ? c->func(avctx, (char*)c->args + i*c->subjob_size): + c->func2(avctx, c->args, i, self_id); + } pthread_mutex_lock(&c->current_job_lock); our_job = c->current_job++; @@ -133,6 +138,8 @@ void ff_slice_thread_free(AVCodecContext *avctx) av_freep(&c->progress_cond); av_freep(&c->workers); + av_freep(&c->subjob_limits); + av_freep(&c->subjob_offsets); av_freep(&avctx->internal->thread_ctx); } @@ -146,7 +153,7 @@ static av_always_inline void thread_park_workers(SliceThreadContext *c, int thre static int thread_execute(AVCodecContext *avctx, action_func* func, void *arg, int *ret, int job_count, int job_size) { SliceThreadContext *c = avctx->internal->thread_ctx; - int dummy_ret; + int dummy_ret, i; if (!(avctx->active_thread_type&FF_THREAD_SLICE) || avctx->thread_count <= 1) return avcodec_default_execute(avctx, func, arg, ret, job_count, job_size); @@ -157,8 +164,14 @@ static int thread_execute(AVCodecContext *avctx, action_func* func, void *arg, i pthread_mutex_lock(&c->current_job_lock); c->current_job = avctx->thread_count; - c->job_count = job_count; - c->job_size = job_size; + c->job_count = FFMIN(job_count, avctx->thread_count); + c->subjob_offsets[0] = 0; + c->subjob_limits[0] = (job_count / c->job_count) + (job_count % c->job_count); + for (i = 1; i < c->job_count; i++) { + c->subjob_offsets[i] = c->subjob_limits[i-1]; + c->subjob_limits[i] = c->subjob_offsets[i] + (job_count / c->job_count); + } + c->subjob_size = job_size; c->args = arg; c->func = func; if (ret) { @@ -218,17 +231,29 @@ int ff_slice_thread_init(AVCodecContext *avctx) av_free(c); return -1; } + c->subjob_offsets = av_mallocz_array(thread_count, sizeof(int)); + if (!c->subjob_offsets) { + av_free(c); + return -1; + } + c->subjob_limits = av_mallocz_array(thread_count, sizeof(int)); + if (!c->subjob_limits) { + av_free(c); + return -1; + } + avctx->internal->thread_ctx = c; c->current_job = 0; c->job_count = 0; - c->job_size = 0; + c->subjob_size = 0; c->done = 0; pthread_cond_init(&c->current_job_cond, NULL); pthread_cond_init(&c->last_job_cond, NULL); pthread_mutex_init(&c->current_job_lock, NULL); pthread_mutex_lock(&c->current_job_lock); for (i=0; i<thread_count; i++) { + c->subjob_limits[i] = c->subjob_offsets[i] = 0; if(pthread_create(&c->workers[i], NULL, worker, avctx)) { avctx->thread_count = i; pthread_mutex_unlock(&c->current_job_lock); -- 2.3.2 (Apple Git-55) _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel