On Sat, Dec 27, 2014 at 11:02:49AM -0500, Ronald S. Bultje wrote: > --- > libavcodec/x86/vp9dsp_init.c | 12 +++------ > libavcodec/x86/vp9lpf.asm | 62 > ++++++++++++++++++++++++++++++++------------ > 2 files changed, 48 insertions(+), 26 deletions(-) > > diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c > index c5decfa..cdbf1e9 100644 > --- a/libavcodec/x86/vp9dsp_init.c > +++ b/libavcodec/x86/vp9dsp_init.c > @@ -355,17 +355,11 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp) > dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \ > dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \ > dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \ > - if (ARCH_X86_64) { \ > - dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \ > - } \ > + dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \ > dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_##opt; \ > - if (ARCH_X86_64) { \ > - dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \ > - } \ > + dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \ > dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_##opt; \ > - if (ARCH_X86_64) { \ > - dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \ > - } \ > + dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \ > dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_##opt; \ > } while (0) > > diff --git a/libavcodec/x86/vp9lpf.asm b/libavcodec/x86/vp9lpf.asm > index c111f48..b8c75e9 100644 > --- a/libavcodec/x86/vp9lpf.asm > +++ b/libavcodec/x86/vp9lpf.asm > @@ -935,9 +935,12 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + > %5, dst, stride, mstride, > mova m3, [P0] > mova m4, [Q0] > mova m5, [Q1] > +%if ARCH_X86_64 > mova m6, [Q2] > +%endif > mova m7, [Q3] > DEFINE_REAL_P7_TO_Q7 > +%if ARCH_X86_64 > SBUTTERFLY bw, 0, 1, 8 > SBUTTERFLY bw, 2, 3, 8 > SBUTTERFLY bw, 4, 5, 8 > @@ -950,22 +953,47 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + > %5, dst, stride, mstride, > SBUTTERFLY dq, 1, 5, 8 > SBUTTERFLY dq, 2, 6, 8 > SBUTTERFLY dq, 3, 7, 8 > - movh [P7], m0 > - movhps [P6], m0 > - movh [Q0], m1 > - movhps [Q1], m1 > - movh [P3], m2 > - movhps [P2], m2 > - movh [Q4], m3 > - movhps [Q5], m3 > - movh [P5], m4 > - movhps [P4], m4 > - movh [Q2], m5 > - movhps [Q3], m5 > - movh [P1], m6 > - movhps [P0], m6 > - movh [Q6], m7 > - movhps [Q7], m7 > +%else > + SBUTTERFLY bw, 0, 1, 6 > + mova [rsp+64], m1 > + mova m6, [rsp+96] > + SBUTTERFLY bw, 2, 3, 1 > + SBUTTERFLY bw, 4, 5, 1 > + SBUTTERFLY bw, 6, 7, 1 > + SBUTTERFLY wd, 0, 2, 1 > + mova [rsp+96], m2 > + mova m1, [rsp+64] > + SBUTTERFLY wd, 1, 3, 2 > + SBUTTERFLY wd, 4, 6, 2 > + SBUTTERFLY wd, 5, 7, 2 > + SBUTTERFLY dq, 0, 4, 2 > + SBUTTERFLY dq, 1, 5, 2 > + movh [Q0], m1 > + movhps [Q1], m1 > + mova m2, [rsp+96] > + SBUTTERFLY dq, 2, 6, 1 > + SBUTTERFLY dq, 3, 7, 1 > +%endif > + SWAP 3, 6 > + SWAP 1, 4 > + movh [P7], m0 > + movhps [P6], m0 > + movh [P5], m1 > + movhps [P4], m1 > + movh [P3], m2 > + movhps [P2], m2 > + movh [P1], m3 > + movhps [P0], m3 > +%if ARCH_X86_64 > + movh [Q0], m4 > + movhps [Q1], m4 > +%endif > + movh [Q2], m5 > + movhps [Q3], m5 > + movh [Q4], m6 > + movhps [Q5], m6 > + movh [Q6], m7 > + movhps [Q7], m7 > %endif > %endif > > @@ -975,7 +1003,7 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + > %5, dst, stride, mstride, > %macro LPF_16_VH 5 > INIT_XMM %5 > LOOPFILTER v, %1, %2, 0, %4 > -%if ARCH_X86_64 || %1 == 44 > +%if ARCH_X86_64 || %1 != 16 > LOOPFILTER h, %1, %2, %3, %4 > %endif > %endmacro
OK -- Clément B.
pgprrhfZb7gey.pgp
Description: PGP signature
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel