On 4/29/18, Michael Niedermayer <mich...@niedermayer.cc> wrote: > On Sat, Apr 28, 2018 at 12:00:46PM +0200, Paul B Mahol wrote: >> Signed-off-by: Paul B Mahol <one...@gmail.com> >> --- >> libavfilter/vf_overlay.c | 281 >> ++++++++++++++++++++++++++++++++--------------- >> 1 file changed, 190 insertions(+), 91 deletions(-) >> >> diff --git a/libavfilter/vf_overlay.c b/libavfilter/vf_overlay.c >> index c6a6ac82f3..cb304e9522 100644 >> --- a/libavfilter/vf_overlay.c >> +++ b/libavfilter/vf_overlay.c >> @@ -40,6 +40,10 @@ >> #include "framesync.h" >> #include "video.h" >> >> +typedef struct ThreadData { >> + AVFrame *dst, *src; >> +} ThreadData; >> + >> static const char *const var_names[] = { >> "main_w", "W", ///< width of the main video >> "main_h", "H", ///< height of the main video >> @@ -124,7 +128,7 @@ typedef struct OverlayContext { >> >> AVExpr *x_pexpr, *y_pexpr; >> >> - void (*blend_image)(AVFilterContext *ctx, AVFrame *dst, const AVFrame >> *src, int x, int y); >> + int (*blend_slice)(AVFilterContext *ctx, void *arg, int jobnr, int >> nb_jobs); >> } OverlayContext; >> >> static av_cold void uninit(AVFilterContext *ctx) >> @@ -403,10 +407,10 @@ static int config_output(AVFilterLink *outlink) >> * Blend image in src to destination buffer dst at position (x, y). >> */ >> >> -static av_always_inline void blend_image_packed_rgb(AVFilterContext >> *ctx, >> +static av_always_inline void blend_slice_packed_rgb(AVFilterContext >> *ctx, >> AVFrame *dst, const AVFrame *src, >> int main_has_alpha, int x, int y, >> - int is_straight) >> + int is_straight, int jobnr, int >> nb_jobs) >> { >> OverlayContext *s = ctx->priv; >> int i, imax, j, jmax; >> @@ -425,13 +429,19 @@ static av_always_inline void >> blend_image_packed_rgb(AVFilterContext *ctx, >> const int sb = s->overlay_rgba_map[B]; >> const int sa = s->overlay_rgba_map[A]; >> const int sstep = s->overlay_pix_step[0]; >> + int slice_start, slice_end; >> uint8_t *S, *sp, *d, *dp; >> >> i = FFMAX(-y, 0); >> - sp = src->data[0] + i * src->linesize[0]; >> - dp = dst->data[0] + (y+i) * dst->linesize[0]; >> + imax = FFMIN(-y + dst_h, src_h); >> + >> + slice_start = (imax * jobnr) / nb_jobs; >> + slice_end = (imax * (jobnr+1)) / nb_jobs; >> + >> + sp = src->data[0] + (i + slice_start) * src->linesize[0]; >> + dp = dst->data[0] + (y + i + slice_start) * dst->linesize[0]; >> >> - for (imax = FFMIN(-y + dst_h, src_h); i < imax; i++) { >> + for (i = i + slice_start; i < slice_end; i++) { >> j = FFMAX(-x, 0); >> S = sp + j * sstep; >> d = dp + (x+j) * dstep; >> @@ -495,7 +505,9 @@ static av_always_inline void >> blend_plane(AVFilterContext *ctx, >> int dst_offset, >> int dst_step, >> int straight, >> - int yuv) >> + int yuv, >> + int jobnr, >> + int nb_jobs) >> { >> int src_wp = AV_CEIL_RSHIFT(src_w, hsub); >> int src_hp = AV_CEIL_RSHIFT(src_h, vsub); >> @@ -505,16 +517,22 @@ static av_always_inline void >> blend_plane(AVFilterContext *ctx, >> int xp = x>>hsub; >> uint8_t *s, *sp, *d, *dp, *dap, *a, *da, *ap; >> int jmax, j, k, kmax; >> + int slice_start, slice_end; >> >> j = FFMAX(-yp, 0); >> - sp = src->data[i] + j * src->linesize[i]; >> + jmax = FFMIN(-yp + dst_hp, src_hp); >> + >> + slice_start = (jmax * jobnr) / nb_jobs; >> + slice_end = ((jmax * (jobnr+1)) / nb_jobs); >> + >> + sp = src->data[i] + slice_start * src->linesize[i]; >> dp = dst->data[dst_plane] >> - + (yp+j) * dst->linesize[dst_plane] >> + + (yp + slice_start) * dst->linesize[dst_plane] >> + dst_offset; >> - ap = src->data[3] + (j<<vsub) * src->linesize[3]; >> - dap = dst->data[3] + ((yp+j) << vsub) * dst->linesize[3]; >> + ap = src->data[3] + (slice_start << vsub) * src->linesize[3]; >> + dap = dst->data[3] + ((yp + slice_start) << vsub) * >> dst->linesize[3]; >> >> - for (jmax = FFMIN(-yp + dst_hp, src_hp); j < jmax; j++) { >> + for (j = j + slice_start; j < slice_end; j++) { >> k = FFMAX(-xp, 0); >> d = dp + (xp+k) * dst_step; >> s = sp + k; >> @@ -577,17 +595,23 @@ static av_always_inline void >> blend_plane(AVFilterContext *ctx, >> static inline void alpha_composite(const AVFrame *src, const AVFrame >> *dst, >> int src_w, int src_h, >> int dst_w, int dst_h, >> - int x, int y) >> + int x, int y, >> + int jobnr, int nb_jobs) >> { >> uint8_t alpha; ///< the amount of overlay to blend on to >> main >> uint8_t *s, *sa, *d, *da; >> int i, imax, j, jmax; >> + int slice_start, slice_end; >> + >> + imax = FFMIN(-y + dst_h, src_h); >> + slice_start = (imax * jobnr) / nb_jobs; >> + slice_end = ((imax * (jobnr+1)) / nb_jobs); >> >> i = FFMAX(-y, 0); >> - sa = src->data[3] + i * src->linesize[3]; >> - da = dst->data[3] + (y+i) * dst->linesize[3]; >> + sa = src->data[3] + (i + slice_start) * src->linesize[3]; >> + da = dst->data[3] + (y + i + slice_start) * dst->linesize[3]; >> >> - for (imax = FFMIN(-y + dst_h, src_h); i < imax; i++) { > >> + for (i = i + slice_start; i < imax; i++) { > > shouldnt this use slice_end ?
Yes. _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel