On Thu, Mar 16, 2017 at 11:37 AM, Muhammad Faiz <mfc...@gmail.com> wrote: > benchmark: > sse2 10.670s > avx 8.763s > fma3 8.380s > > Signed-off-by: Muhammad Faiz <mfc...@gmail.com> > --- > libswresample/x86/resample.asm | 15 ++++++++++++--- > libswresample/x86/resample_init.c | 10 ++++++++++ > 2 files changed, 22 insertions(+), 3 deletions(-) > > diff --git a/libswresample/x86/resample.asm b/libswresample/x86/resample.asm > index 4163df1..7107cf9 100644 > --- a/libswresample/x86/resample.asm > +++ b/libswresample/x86/resample.asm > @@ -203,7 +203,7 @@ cglobal resample_common_%1, 1, 7, 2, ctx, phase_count, > dst, frac, \ > ; horizontal sum & store > %if mmsize == 32 > vextractf128 xm1, m0, 0x1 > - addps xm0, xm1 > + addp%4 xm0, xm1 > %endif > movhlps xm1, xm0 > %ifidn %1, float > @@ -489,8 +489,8 @@ cglobal resample_linear_%1, 1, 7, 5, ctx, > min_filter_length_x4, filter2, \ > %if mmsize == 32 > vextractf128 xm1, m0, 0x1 > vextractf128 xm3, m2, 0x1 > - addps xm0, xm1 > - addps xm2, xm3 > + addp%4 xm0, xm1 > + addp%4 xm2, xm3 > %endif > cvtsi2s%4 xm1, fracd > subp%4 xm2, xm0 > @@ -608,3 +608,12 @@ RESAMPLE_FNS int16, 2, 1 > > INIT_XMM sse2 > RESAMPLE_FNS double, 8, 3, d, pdbl_1 > + > +%if HAVE_AVX_EXTERNAL > +INIT_YMM avx > +RESAMPLE_FNS double, 8, 3, d, pdbl_1 > +%endif > +%if HAVE_FMA3_EXTERNAL > +INIT_YMM fma3 > +RESAMPLE_FNS double, 8, 3, d, pdbl_1 > +%endif > diff --git a/libswresample/x86/resample_init.c > b/libswresample/x86/resample_init.c > index e515762..c6b2a36 100644 > --- a/libswresample/x86/resample_init.c > +++ b/libswresample/x86/resample_init.c > @@ -42,6 +42,8 @@ RESAMPLE_FUNCS(float, avx); > RESAMPLE_FUNCS(float, fma3); > RESAMPLE_FUNCS(float, fma4); > RESAMPLE_FUNCS(double, sse2); > +RESAMPLE_FUNCS(double, avx); > +RESAMPLE_FUNCS(double, fma3); > > av_cold void swri_resample_dsp_x86_init(ResampleContext *c) > { > @@ -85,6 +87,14 @@ av_cold void swri_resample_dsp_x86_init(ResampleContext *c) > c->dsp.resample_linear = ff_resample_linear_double_sse2; > c->dsp.resample_common = ff_resample_common_double_sse2; > } > + if (EXTERNAL_AVX_FAST(mm_flags)) { > + c->dsp.resample_linear = ff_resample_linear_double_avx; > + c->dsp.resample_common = ff_resample_common_double_avx; > + } > + if (EXTERNAL_FMA3_FAST(mm_flags)) { > + c->dsp.resample_linear = ff_resample_linear_double_fma3; > + c->dsp.resample_common = ff_resample_common_double_fma3; > + } > break; > } > } > -- > 2.9.3 >
Applied Thank's _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel