On 6/1/19, Jun Zhao <mypopy...@gmail.com> wrote: > From: Jun Zhao <barryjz...@tencent.com> > > Used the command for 1080p h264 clip as follow: > > a). ffmpeg -i input -vf lutyuv="u=128:v=128" -f null /dev/null > b). ffmpeg -i input -vf lutrgb="g=0:b=0" -f null /dev/null > > after enabled the slice threading, the fps change from: > > a). 144fps to 258fps (lutyuv) > b). 94fps to 153fps (lutrgb) > > in Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz > > Signed-off-by: Jun Zhao <barryjz...@tencent.com> > --- > libavfilter/vf_lut.c | 310 > ++++++++++++++++++++++++++++++++------------------ > 1 files changed, 197 insertions(+), 113 deletions(-) > > diff --git a/libavfilter/vf_lut.c b/libavfilter/vf_lut.c > index c815ddc..90998e6 100644 > --- a/libavfilter/vf_lut.c > +++ b/libavfilter/vf_lut.c > @@ -337,13 +337,194 @@ static int config_props(AVFilterLink *inlink) > return 0; > } > > +struct thread_data { > + AVFrame *in; > + AVFrame *out; > + > + int w; > + int h; > +}; > + > +#define LOAD_PACKED_COMMON\ > + LutContext *s = ctx->priv;\ > + const struct thread_data *td = arg;\ > +\ > + int i, j;\ > + const int w = td->w;\ > + const int h = td->h;\ > + AVFrame *in = td->in;\ > + AVFrame *out = td->out;\ > + const uint16_t (*tab)[256*256] = (const uint16_t (*)[256*256])s->lut;\ > + const int step = s->step;\ > +\ > + const int slice_start = (h * jobnr ) / nb_jobs;\ > + const int slice_end = (h * (jobnr+1)) / nb_jobs;\ > + > +/* packed, 16-bit */ > +static int lut_packed_16bits(AVFilterContext *ctx, void *arg, int jobnr, > int nb_jobs) > +{ > + LOAD_PACKED_COMMON > + > + uint16_t *inrow, *outrow, *inrow0, *outrow0; > + const int in_linesize = in->linesize[0] / 2; > + const int out_linesize = out->linesize[0] / 2; > + inrow0 = (uint16_t *)in ->data[0]; > + outrow0 = (uint16_t *)out->data[0]; > + > + for (i = slice_start; i < slice_end; i++) { > + inrow = inrow0 + i * in_linesize; > + outrow = outrow0 + i * out_linesize; > + for (j = 0; j < w; j++) { > + > + switch (step) { > +#if HAVE_BIGENDIAN > + case 4: outrow[3] = av_bswap16(tab[3][av_bswap16(inrow[3])]); > // Fall-through > + case 3: outrow[2] = av_bswap16(tab[2][av_bswap16(inrow[2])]); > // Fall-through > + case 2: outrow[1] = av_bswap16(tab[1][av_bswap16(inrow[1])]); > // Fall-through > + default: outrow[0] = av_bswap16(tab[0][av_bswap16(inrow[0])]); > +#else > + case 4: outrow[3] = tab[3][inrow[3]]; // Fall-through > + case 3: outrow[2] = tab[2][inrow[2]]; // Fall-through > + case 2: outrow[1] = tab[1][inrow[1]]; // Fall-through > + default: outrow[0] = tab[0][inrow[0]]; > +#endif > + } > + outrow += step; > + inrow += step; > + } > + } > + > + return 0; > +} > + > +/* packed, 8-bit */ > +static int lut_packed_8bits(AVFilterContext *ctx, void *arg, int jobnr, int > nb_jobs) > +{ > + LOAD_PACKED_COMMON > + > + uint8_t *inrow, *outrow, *inrow0, *outrow0; > + const int in_linesize = in->linesize[0]; > + const int out_linesize = out->linesize[0]; > + inrow0 = in ->data[0]; > + outrow0 = out->data[0]; > + > + for (i = slice_start; i < slice_end; i++) { > + inrow = inrow0 + i * in_linesize; > + outrow = outrow0 + i * out_linesize; > + for (j = 0; j < w; j++) { > + switch (step) { > + case 4: outrow[3] = tab[3][inrow[3]]; // Fall-through > + case 3: outrow[2] = tab[2][inrow[2]]; // Fall-through > + case 2: outrow[1] = tab[1][inrow[1]]; // Fall-through > + default: outrow[0] = tab[0][inrow[0]]; > + } > + outrow += step; > + inrow += step; > + } > + } > + > + return 0; > +} > + > +#define LOAD_PLANAR_COMMON\ > + LutContext *s = ctx->priv;\ > + const struct thread_data *td = arg;\ > + int i, j, plane;\ > + AVFrame *in = td->in;\ > + AVFrame *out = td->out;\ > + > +#define PLANAR_COMMON\ > + int vsub = plane == 1 || plane == 2 ? s->vsub : 0;\ > + int hsub = plane == 1 || plane == 2 ? s->hsub : 0;\ > + int h = AV_CEIL_RSHIFT(td->h, vsub);\ > + int w = AV_CEIL_RSHIFT(td->w, hsub);\ > + const uint16_t *tab = s->lut[plane];\ > +\ > + const int slice_start = (h * jobnr ) / nb_jobs;\ > + const int slice_end = (h * (jobnr+1)) / nb_jobs;\ > + > +/* planar >8 bit depth */ > +static int lut_planar_16bits(AVFilterContext *ctx, void *arg, int jobnr, > int nb_jobs) > +{ > + LOAD_PLANAR_COMMON > + > + uint16_t *inrow, *outrow; > + > + for (plane = 0; plane < 4 && in->data[plane] && in->linesize[plane]; > plane++) { > + PLANAR_COMMON > + > + const int in_linesize = in->linesize[plane] / 2; > + const int out_linesize = out->linesize[plane] / 2; > + > + inrow = (uint16_t *)(in ->data[plane] + slice_start * > in_linesize); > + outrow = (uint16_t *)(out->data[plane] + slice_start * > out_linesize); > + > + for (i = slice_start; i < slice_end; i++) { > + for (j = 0; j < w; j++) { > +#if HAVE_BIGENDIAN > + outrow[j] = av_bswap16(tab[av_bswap16(inrow[j])]); > +#else > + outrow[j] = tab[inrow[j]]; > +#endif > + } > + inrow += in_linesize; > + outrow += out_linesize; > + } > + } > + > + return 0; > +} > + > +/* planar 8bit depth */ > +static int lut_planar_8bits(AVFilterContext *ctx, void *arg, int jobnr, int > nb_jobs) > +{ > + LOAD_PLANAR_COMMON > + > + uint8_t *inrow, *outrow; > + > + for (plane = 0; plane < 4 && in->data[plane] && in->linesize[plane]; > plane++) { > + PLANAR_COMMON > + > + const int in_linesize = in->linesize[plane]; > + const int out_linesize = out->linesize[plane]; > + > + inrow = in ->data[plane] + slice_start * in_linesize; > + outrow = out->data[plane] + slice_start * out_linesize; > + > + for (i = slice_start; i < slice_end; i++) { > + for (j = 0; j < w; j++) > + outrow[j] = tab[inrow[j]]; > + inrow += in_linesize; > + outrow += out_linesize; > + } > + } > + > + return 0; > +} > + > +#define PACKED_THREAD_DATA\ > + struct thread_data td = {\ > + .in = in,\ > + .out = out,\ > + .w = inlink->w,\ > + .h = in->height,\ > + };\ > + > +#define PLANAR_THREAD_DATA\ > + struct thread_data td = {\ > + .in = in,\ > + .out = out,\ > + .w = inlink->w,\ > + .h = inlink->h,\ > + };\ > + > static int filter_frame(AVFilterLink *inlink, AVFrame *in) > { > AVFilterContext *ctx = inlink->dst; > LutContext *s = ctx->priv; > AVFilterLink *outlink = ctx->outputs[0]; > AVFrame *out; > - int i, j, plane, direct = 0; > + int direct = 0; > > if (av_frame_is_writable(in)) { > direct = 1; > @@ -359,121 +540,24 @@ static int filter_frame(AVFilterLink *inlink, AVFrame > *in) > > if (s->is_rgb && s->is_16bit && !s->is_planar) { > /* packed, 16-bit */ > - uint16_t *inrow, *outrow, *inrow0, *outrow0; > - const int w = inlink->w; > - const int h = in->height; > - const uint16_t (*tab)[256*256] = (const uint16_t > (*)[256*256])s->lut; > - const int in_linesize = in->linesize[0] / 2; > - const int out_linesize = out->linesize[0] / 2; > - const int step = s->step; > - > - inrow0 = (uint16_t*) in ->data[0]; > - outrow0 = (uint16_t*) out->data[0]; > - > - for (i = 0; i < h; i ++) { > - inrow = inrow0; > - outrow = outrow0; > - for (j = 0; j < w; j++) { > - > - switch (step) { > -#if HAVE_BIGENDIAN > - case 4: outrow[3] = > av_bswap16(tab[3][av_bswap16(inrow[3])]); // Fall-through > - case 3: outrow[2] = > av_bswap16(tab[2][av_bswap16(inrow[2])]); // Fall-through > - case 2: outrow[1] = > av_bswap16(tab[1][av_bswap16(inrow[1])]); // Fall-through > - default: outrow[0] = > av_bswap16(tab[0][av_bswap16(inrow[0])]); > -#else > - case 4: outrow[3] = tab[3][inrow[3]]; // Fall-through > - case 3: outrow[2] = tab[2][inrow[2]]; // Fall-through > - case 2: outrow[1] = tab[1][inrow[1]]; // Fall-through > - default: outrow[0] = tab[0][inrow[0]]; > -#endif > - } > - outrow += step; > - inrow += step; > - } > - inrow0 += in_linesize; > - outrow0 += out_linesize; > - } > + PACKED_THREAD_DATA > + ctx->internal->execute(ctx, lut_packed_16bits, &td, NULL, > + FFMIN(in->height, > ff_filter_get_nb_threads(ctx))); > } else if (s->is_rgb && !s->is_planar) { > - /* packed */ > - uint8_t *inrow, *outrow, *inrow0, *outrow0; > - const int w = inlink->w; > - const int h = in->height; > - const uint16_t (*tab)[256*256] = (const uint16_t > (*)[256*256])s->lut; > - const int in_linesize = in->linesize[0]; > - const int out_linesize = out->linesize[0]; > - const int step = s->step; > - > - inrow0 = in ->data[0]; > - outrow0 = out->data[0]; > - > - for (i = 0; i < h; i ++) { > - inrow = inrow0; > - outrow = outrow0; > - for (j = 0; j < w; j++) { > - switch (step) { > - case 4: outrow[3] = tab[3][inrow[3]]; // Fall-through > - case 3: outrow[2] = tab[2][inrow[2]]; // Fall-through > - case 2: outrow[1] = tab[1][inrow[1]]; // Fall-through > - default: outrow[0] = tab[0][inrow[0]]; > - } > - outrow += step; > - inrow += step; > - } > - inrow0 += in_linesize; > - outrow0 += out_linesize; > - } > + /* packed 8 bits */ > + PACKED_THREAD_DATA > + ctx->internal->execute(ctx, lut_packed_8bits, &td, NULL, > + FFMIN(in->height, > ff_filter_get_nb_threads(ctx))); > } else if (s->is_16bit) { > - // planar >8 bit depth > - uint16_t *inrow, *outrow; > - > - for (plane = 0; plane < 4 && in->data[plane] && > in->linesize[plane]; plane++) { > - int vsub = plane == 1 || plane == 2 ? s->vsub : 0; > - int hsub = plane == 1 || plane == 2 ? s->hsub : 0; > - int h = AV_CEIL_RSHIFT(inlink->h, vsub); > - int w = AV_CEIL_RSHIFT(inlink->w, hsub); > - const uint16_t *tab = s->lut[plane]; > - const int in_linesize = in->linesize[plane] / 2; > - const int out_linesize = out->linesize[plane] / 2; > - > - inrow = (uint16_t *)in ->data[plane]; > - outrow = (uint16_t *)out->data[plane]; > - > - for (i = 0; i < h; i++) { > - for (j = 0; j < w; j++) { > -#if HAVE_BIGENDIAN > - outrow[j] = av_bswap16(tab[av_bswap16(inrow[j])]); > -#else > - outrow[j] = tab[inrow[j]]; > -#endif > - } > - inrow += in_linesize; > - outrow += out_linesize; > - } > - } > + /* planar >8 bit depth */ > + PLANAR_THREAD_DATA > + ctx->internal->execute(ctx, lut_planar_16bits, &td, NULL, > + FFMIN(in->height, > ff_filter_get_nb_threads(ctx))); > } else { > /* planar 8bit depth */ > - uint8_t *inrow, *outrow; > - > - for (plane = 0; plane < 4 && in->data[plane] && > in->linesize[plane]; plane++) { > - int vsub = plane == 1 || plane == 2 ? s->vsub : 0; > - int hsub = plane == 1 || plane == 2 ? s->hsub : 0; > - int h = AV_CEIL_RSHIFT(inlink->h, vsub); > - int w = AV_CEIL_RSHIFT(inlink->w, hsub); > - const uint16_t *tab = s->lut[plane]; > - const int in_linesize = in->linesize[plane]; > - const int out_linesize = out->linesize[plane]; > - > - inrow = in ->data[plane]; > - outrow = out->data[plane]; > - > - for (i = 0; i < h; i++) { > - for (j = 0; j < w; j++) > - outrow[j] = tab[inrow[j]]; > - inrow += in_linesize; > - outrow += out_linesize; > - } > - } > + PLANAR_THREAD_DATA > + ctx->internal->execute(ctx, lut_planar_8bits, &td, NULL, > + FFMIN(in->height, > ff_filter_get_nb_threads(ctx))); > } > > if (!direct) > @@ -508,7 +592,7 @@ static const AVFilterPad outputs[] = { > .query_formats = query_formats, \ > .inputs = inputs, \ > .outputs = outputs, \ > - .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, \ > + .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | > AVFILTER_FLAG_SLICE_THREADS, \ > } > > #if CONFIG_LUT_FILTER > -- > 1.7.1 >
Should be ok if md5 hash does not change. > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".