On 1/18/2018 6:16 PM, James Almer wrote: > On 1/18/2018 6:06 PM, Marton Balint wrote: >> Blend function speedups on x86_64 Core i5 4460: >> >> ffmpeg -f lavfi -i allyuv -vf framerate=60:threads=1 -f null none >> >> C: 447548411 decicycles in Blend, 2048 runs, 0 skips >> SSSE3: 130020087 decicycles in Blend, 2048 runs, 0 skips >> AVX2: 128508221 decicycles in Blend, 2048 runs, 0 skips >> >> ffmpeg -f lavfi -i allyuv -vf format=yuv420p12,framerate=60:threads=1 -f >> null none >> >> C: 228932745 decicycles in Blend, 2048 runs, 0 skips >> SSE4: 123357781 decicycles in Blend, 2048 runs, 0 skips >> AVX2: 121215353 decicycles in Blend, 2048 runs, 0 skips >> >> Signed-off-by: Marton Balint <c...@passwd.hu> >> --- >> libavfilter/vf_framerate.c | 24 ++++++- >> libavfilter/x86/Makefile | 1 + >> libavfilter/x86/vf_framerate.asm | 136 >> +++++++++++++++++++++++++++++++++++++++ >> 3 files changed, 158 insertions(+), 3 deletions(-) >> create mode 100644 libavfilter/x86/vf_framerate.asm >> >> diff --git a/libavfilter/vf_framerate.c b/libavfilter/vf_framerate.c >> index d315ef5d09..6a3b85910f 100644 >> --- a/libavfilter/vf_framerate.c >> +++ b/libavfilter/vf_framerate.c >> @@ -29,11 +29,13 @@ >> #define DEBUG >> >> #include "libavutil/avassert.h" >> +#include "libavutil/cpu.h" >> #include "libavutil/imgutils.h" >> #include "libavutil/internal.h" >> #include "libavutil/opt.h" >> #include "libavutil/pixdesc.h" >> #include "libavutil/pixelutils.h" >> +#include "libavutil/x86/cpu.h" >> >> #include "avfilter.h" >> #include "internal.h" >> @@ -246,7 +248,7 @@ static int blend_frames(AVFilterContext *ctx, int >> interpolate) >> av_frame_copy_props(s->work, s->f0); >> >> ff_dlog(ctx, "blend_frames() INTERPOLATE to create work frame\n"); >> - ctx->internal->execute(ctx, filter_slice, &td, NULL, >> FFMIN(outlink->h, ff_filter_get_nb_threads(ctx))); >> + ctx->internal->execute(ctx, filter_slice, &td, NULL, FFMIN(FFMAX(1, >> outlink->h >> 2), ff_filter_get_nb_threads(ctx))); >> return 1; >> } >> return 0; >> @@ -347,6 +349,11 @@ static void blend_frames_c(BLEND_FUNC_PARAMS) >> } >> } >> >> +void ff_blend_frames_ssse3(BLEND_FUNC_PARAMS); >> +void ff_blend_frames_avx2(BLEND_FUNC_PARAMS); >> +void ff_blend_frames16_sse4(BLEND_FUNC_PARAMS); >> +void ff_blend_frames16_avx2(BLEND_FUNC_PARAMS); >> + >> static void blend_frames16_c(BLEND_FUNC_PARAMS) >> { >> int line, pixel; >> @@ -371,6 +378,7 @@ static int config_input(AVFilterLink *inlink) >> AVFilterContext *ctx = inlink->dst; >> FrameRateContext *s = ctx->priv; >> const AVPixFmtDescriptor *pix_desc = >> av_pix_fmt_desc_get(inlink->format); >> + int cpu_flags = av_get_cpu_flags(); >> int plane; >> >> for (plane = 0; plane < 4; plane++) { >> @@ -389,10 +397,20 @@ static int config_input(AVFilterLink *inlink) >> >> if (s->bitdepth == 8) { >> s->blend_factor_max = 1 << BLEND_FACTOR_DEPTH8; >> - s->blend = blend_frames_c; >> + if (ARCH_X86 && EXTERNAL_AVX2_FAST(cpu_flags)) >> + s->blend = ff_blend_frames_avx2; >> + else if (ARCH_X86 && EXTERNAL_SSSE3(cpu_flags)) >> + s->blend = ff_blend_frames_ssse3; >> + else >> + s->blend = blend_frames_c; >> } else { >> s->blend_factor_max = 1 << BLEND_FACTOR_DEPTH16; >> - s->blend = blend_frames16_c; >> + if (ARCH_X86 && EXTERNAL_AVX2_FAST(cpu_flags)) >> + s->blend = ff_blend_frames16_avx2; >> + else if (ARCH_X86 && EXTERNAL_SSE4(cpu_flags)) >> + s->blend = ff_blend_frames16_sse4; >> + else >> + s->blend = blend_frames16_c; > > The simd function pointer initialization and the respective prototypes > should be in a separate file in the x86 folder. In here you should only > have something like > > if (ARCH_X86) > ff_blend_frames_init_x86(s);
On second thought, seeing this is the framerate filter, a more correct name would be ff_framerate_init_x86(). Blend may not be the only function the filter could optimize with assembly in the future. > > Then the corresponding pointer initialization inside that function. The > prototype for ff_blend_frames_init_x86() should be in a new header. > > See how vf_blend (and many other filters) do. > >> } >> >> return 0; _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel