On 1/6/2016 11:54 PM, Hendrik Leppkes wrote: > --- > Based on an idea from Ronald mentioend in an earlier thread about this > function. > > It works and passes FATE, however I'm sure some aspects can be done easier or > cleaner, so please let me know. > > > libavfilter/x86/vf_w3fdif.asm | 37 ++++++++++++++++++++++++++++++++++--- > libavfilter/x86/vf_w3fdif_init.c | 2 +- > 2 files changed, 35 insertions(+), 4 deletions(-) > > diff --git a/libavfilter/x86/vf_w3fdif.asm b/libavfilter/x86/vf_w3fdif.asm > index c3c73ea..35768c3 100644 > --- a/libavfilter/x86/vf_w3fdif.asm > +++ b/libavfilter/x86/vf_w3fdif.asm > @@ -102,14 +102,22 @@ cglobal w3fdif_complex_low, 4, 7, 8, 0, work_line, > in_lines_cur0, coef, linesize > REP_RET > > %if ARCH_X86_64 > - > cglobal w3fdif_simple_high, 5, 9, 8, 0, work_line, in_lines_cur0, > in_lines_adj0, coef, linesize > +%else > +cglobal w3fdif_simple_high, 4, 7, 8, 0, work_line, in_lines_cur0, > in_lines_adj0, coef, linesize > +%endif > movq m2, [coefq] > - DEFINE_ARGS work_line, in_lines_cur0, in_lines_adj0, in_lines_cur1, > linesize, offset, in_lines_cur2, in_lines_adj1, in_lines_adj2 > +%if ARCH_X86_64 > + DEFINE_ARGS work_line, in_lines_cur0, in_lines_adj0, linesize, > offset, in_lines_cur1, in_lines_cur2, in_lines_adj1, in_lines_adj2
This broke x86_64. Leave it as it was above. > + mov offsetq, 0 Since you're moving this take the chance to replace it with a xor. > +%else > + DEFINE_ARGS work_line, in_lines_cur0, in_lines_adj0, in_lines_cur1, > in_lines_cur2, in_lines_adj1, in_lines_adj2 > + %define linesized dword r4m Nit: r4mp instead of dword r4m > +%endif > + > pshufd m0, m2, q0000 > SPLATW m2, m2, 2 > pxor m7, m7 > - mov offsetq, 0 > mov in_lines_cur2q, [in_lines_cur0q+gprsize*2] > mov in_lines_cur1q, [in_lines_cur0q+gprsize] > mov in_lines_cur0q, [in_lines_cur0q] > @@ -117,8 +125,21 @@ cglobal w3fdif_simple_high, 5, 9, 8, 0, work_line, > in_lines_cur0, in_lines_adj0, > mov in_lines_adj1q, [in_lines_adj0q+gprsize] > mov in_lines_adj0q, [in_lines_adj0q] > > +%if ARCH_X86_32 > + sub in_lines_cur1q, in_lines_cur0q > + sub in_lines_cur2q, in_lines_cur0q > + sub in_lines_adj0q, in_lines_cur0q > + sub in_lines_adj1q, in_lines_cur0q > + sub in_lines_adj2q, in_lines_cur0q > + %define offsetq in_lines_cur0q > +%endif > + > .loop: > +%if ARCH_X86_64 > movh m3, [in_lines_cur0q+offsetq] > +%else > + movh m3, [in_lines_cur0q] > +%endif > movh m4, [in_lines_cur1q+offsetq] > punpcklbw m3, m7 > punpcklbw m4, m7 > @@ -143,15 +164,25 @@ cglobal w3fdif_simple_high, 5, 9, 8, 0, work_line, > in_lines_cur0, in_lines_adj0, > pmaddwd m6, m2 > paddd m3, m5 > paddd m4, m6 > +%if ARCH_X86_64 > paddd m3, [work_lineq+offsetq*4] > paddd m4, [work_lineq+offsetq*4+mmsize] > mova [work_lineq+offsetq*4], m3 > mova [work_lineq+offsetq*4+mmsize], m4 > +%else > + paddd m3, [work_lineq] > + paddd m4, [work_lineq+mmsize] > + mova [work_lineq], m3 > + mova [work_lineq+mmsize], m4 > + add work_lineq, mmsize*2 > +%endif > add offsetq, mmsize/2 > sub linesized, mmsize/2 > jg .loop > REP_RET > > +%if ARCH_X86_64 > + > cglobal w3fdif_complex_high, 5, 13, 10, 0, work_line, in_lines_cur0, > in_lines_adj0, coef, linesize > movq m0, [coefq+0] > movd m4, [coefq+8] > diff --git a/libavfilter/x86/vf_w3fdif_init.c > b/libavfilter/x86/vf_w3fdif_init.c > index 72ea657..9bf06e8 100644 > --- a/libavfilter/x86/vf_w3fdif_init.c > +++ b/libavfilter/x86/vf_w3fdif_init.c > @@ -51,12 +51,12 @@ av_cold void ff_w3fdif_init_x86(W3FDIFDSPContext *dsp) > > if (EXTERNAL_SSE2(cpu_flags)) { > dsp->filter_simple_low = ff_w3fdif_simple_low_sse2; > + dsp->filter_simple_high = ff_w3fdif_simple_high_sse2; > dsp->filter_complex_low = ff_w3fdif_complex_low_sse2; > dsp->filter_scale = ff_w3fdif_scale_sse2; > } > > if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags)) { > - dsp->filter_simple_high = ff_w3fdif_simple_high_sse2; > dsp->filter_complex_high = ff_w3fdif_complex_high_sse2; > } > } > Seems to work. Maybe it can be improved but it should be good as is. And to answer your question, no, i wasn't working on this. I got distracted with other filters :P _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel