Avoids atomic stores and loads and is a prerequisite for removing all atomic synchronizations for VP7. Notice that removing the explicit atomic_store() in vp78_decode_mb_row_sliced() does not negatively affect parallelism during slice-threading, because no check_thread_pos() ever waits for an (mb_x, mb_y) pair with mb_x == 0 (which this atomic store signalled).
Signed-off-by: Andreas Rheinhardt <andreas.rheinha...@outlook.com> --- Btw: The code in update_pos looks fishy to me; namely the part that tries to avoid the broadcast. Consider the scenario in which the other threads (prev and next, A and B) are not waiting when the current thread C checks their wait_mb_pos. Then one of the other threads reads C's thread_mb_pos and notices that it needs to wait for an update from C. It therefore locks C's mutex, stores its wait_mb_pos, checks C's thread_mb_pos again (still reading the old value in this scenario) and waits via pthread_cond_wait(). Then C updates its thread_mb_pos, but because C uses outdated values for A and B's wait_mb_pos, it never signals a broadcast. Who will then wake up the waiting thread? This should be fixable by moving the loads after C's update of thread_mb_pos: In case C's read of A's wait_mb_pos value happens before A updates it, then C's update of its thread_mb_pos happens before A updates its wait_mb_pos and A will therefore read C's updated value of thread_mb_pos its atomic_load while holding C's lock (and will therefore never call pthread_cond_wait()). In case C's read of A's wait_mb_pos value happens after A updates it, C will emit its broadcast, waking A which reads the updated value and stops. libavcodec/vp8.c | 30 +++++++++++++++--------------- libavcodec/vp8.h | 4 ++-- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c index c259f3588c..5ecb9b07e5 100644 --- a/libavcodec/vp8.c +++ b/libavcodec/vp8.c @@ -2389,11 +2389,11 @@ static int vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame, #endif static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata, - int jobnr, int threadnr, int is_vp7) + int jobnr, int threadnr, int mb_y, + int is_vp7) { VP8Context *s = avctx->priv_data; VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr]; - int mb_y = atomic_load(&td->thread_mb_pos) >> 16; int mb_x, mb_xy = mb_y * s->mb_width; int num_jobs = s->num_jobs; const VP8Frame *prev_frame = s->prev_frame; @@ -2518,23 +2518,24 @@ static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void } static int vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata, - int jobnr, int threadnr) + int jobnr, int threadnr, int mb_y) { - return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1); + return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, mb_y, 1); } static int vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata, - int jobnr, int threadnr) + int jobnr, int threadnr, int mb_y) { - return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0); + return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, mb_y, 0); } static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata, - int jobnr, int threadnr, int is_vp7) + int jobnr, int threadnr, int mb_y, + int is_vp7) { VP8Context *s = avctx->priv_data; VP8ThreadData *td = &s->thread_data[threadnr]; - int mb_x, mb_y = atomic_load(&td->thread_mb_pos) >> 16, num_jobs = s->num_jobs; + int mb_x, num_jobs = s->num_jobs; AVFrame *curframe = s->curframe->tf.f; VP8Macroblock *mb; VP8ThreadData *prev_td, *next_td; @@ -2589,15 +2590,15 @@ static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata, } static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata, - int jobnr, int threadnr) + int jobnr, int threadnr, int mb_y) { - filter_mb_row(avctx, tdata, jobnr, threadnr, 1); + filter_mb_row(avctx, tdata, jobnr, threadnr, mb_y, 1); } static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata, - int jobnr, int threadnr) + int jobnr, int threadnr, int mb_y) { - filter_mb_row(avctx, tdata, jobnr, threadnr, 0); + filter_mb_row(avctx, tdata, jobnr, threadnr, mb_y, 0); } static av_always_inline @@ -2615,14 +2616,13 @@ int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr, td->mv_bounds.mv_min.y = -MARGIN - 64 * threadnr; td->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN - 64 * threadnr; for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) { - atomic_store(&td->thread_mb_pos, mb_y << 16); - ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr); + ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, mb_y); if (ret < 0) { update_pos(td, s->mb_height, INT_MAX & 0xFFFF); return ret; } if (s->deblock_filter) - s->filter_mb_row(avctx, tdata, jobnr, threadnr); + s->filter_mb_row(avctx, tdata, jobnr, threadnr, mb_y); update_pos(td, mb_y, INT_MAX & 0xFFFF); td->mv_bounds.mv_min.y -= 64 * num_jobs; diff --git a/libavcodec/vp8.h b/libavcodec/vp8.h index 30aeb4cb06..ed79bc79c1 100644 --- a/libavcodec/vp8.h +++ b/libavcodec/vp8.h @@ -330,8 +330,8 @@ typedef struct VP8Context { */ int mb_layout; - int (*decode_mb_row_no_filter)(AVCodecContext *avctx, void *tdata, int jobnr, int threadnr); - void (*filter_mb_row)(AVCodecContext *avctx, void *tdata, int jobnr, int threadnr); + int (*decode_mb_row_no_filter)(AVCodecContext *avctx, void *tdata, int jobnr, int threadnr, int mb_y); + void (*filter_mb_row)(AVCodecContext *avctx, void *tdata, int jobnr, int threadnr, int mb_y); int vp7; -- 2.34.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".