On 20 July 2016 at 02:40, James Almer <jamr...@gmail.com> wrote: > Signed-off-by: James Almer <jamr...@gmail.com> > --- > libavcodec/x86/diracdsp.asm | 37 ++++++++++++++++++++----------------- > libavcodec/x86/diracdsp_init.c | 4 ---- > 2 files changed, 20 insertions(+), 21 deletions(-) > > diff --git a/libavcodec/x86/diracdsp.asm b/libavcodec/x86/diracdsp.asm > index d86b543..6b3f780 100644 > --- a/libavcodec/x86/diracdsp.asm > +++ b/libavcodec/x86/diracdsp.asm > @@ -303,24 +303,30 @@ cglobal dequant_subband_32, 7, 7, 4, src, dst, > stride, qf, qs, tot_v, tot_h > > RET > > -%if ARCH_X86_64 == 1 > +INIT_XMM sse4 > ; void put_signed_rect_clamped_10(uint8_t *dst, int dst_stride, const > uint8_t *src, int src_stride, int width, int height) > -cglobal put_signed_rect_clamped_10, 6, 9, 6, dst, dst_stride, src, > src_stride, w, h > - mov r6, srcq > - mov r7, dstq > - mov r8, wq > +%if ARCH_X86_64 > +cglobal put_signed_rect_clamped_10, 6, 8, 5, dst, dst_stride, src, > src_stride, w, h, t1, t2 > +%else > +cglobal put_signed_rect_clamped_10, 5, 7, 5, dst, dst_stride, src, > src_stride, w, t1, t2 > + %define hd r5mp > +%endif > + shl wd, 2 > + add srcq, wq > + neg wq > + mov t2q, dstq > + mov t1q, wq > pxor m2, m2 > mova m3, [clip_10bit] > mova m4, [convert_to_unsigned_10bit] > > .loop_h: > - mov srcq, r6 > - mov dstq, r7 > - mov wq, r8 > + mov dstq, t2q > + mov wq, t1q > > .loop_w: > - movu m0, [srcq+0*mmsize] > - movu m1, [srcq+1*mmsize] > + movu m0, [srcq+wq+0*mmsize] > + movu m1, [srcq+wq+1*mmsize] > > paddd m0, m4 > paddd m1, m4 > @@ -329,16 +335,13 @@ cglobal put_signed_rect_clamped_10, 6, 9, 6, dst, > dst_stride, src, src_stride, w > > movu [dstq], m0 > > - add srcq, 2*mmsize > add dstq, 1*mmsize > - sub wd, 8 > - jg .loop_w > + add wq, 2*mmsize > + jl .loop_w > > - add r6, src_strideq > - add r7, dst_strideq > + add srcq, src_strideq > + add t2q, dst_strideq > sub hd, 1 > jg .loop_h > > RET > - > -%endif > diff --git a/libavcodec/x86/diracdsp_init.c > b/libavcodec/x86/diracdsp_init.c > index d7c7cd1..b195113 100644 > --- a/libavcodec/x86/diracdsp_init.c > +++ b/libavcodec/x86/diracdsp_init.c > @@ -45,9 +45,7 @@ void ff_put_rect_clamped_mmx(uint8_t *dst, int > dst_stride, const int16_t *src, i > void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t > *src, int src_stride, int width, int height); > void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const > int16_t *src, int src_stride, int width, int height); > void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const > int16_t *src, int src_stride, int width, int height); > -#if ARCH_X86_64 > void ff_put_signed_rect_clamped_10_sse4(uint8_t *dst, int dst_stride, > const uint8_t *src, int src_stride, int width, int height); > -#endif > > void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t > stride, const int qf, const int qs, int tot_v, int tot_h); > > @@ -192,8 +190,6 @@ void ff_diracdsp_init_x86(DiracDSPContext* c) > > if (EXTERNAL_SSE4(mm_flags)) { > c->dequant_subband[1] = ff_dequant_subband_32_sse4; > -#if ARCH_X86_64 > c->put_signed_rect_clamped[1] = > ff_put_signed_rect_clamped_10_sse4; > -#endif > } > } > -- > 2.9.1 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel >
Very nice, thanks. Push whenever you have the time _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel