Signed-off-by: Rostislav Pehlivanov <rpehliva...@obe.tv> --- libavcodec/x86/diracdsp.asm | 47 ++++++++++++++++++++++++++++++++++++++++++ libavcodec/x86/diracdsp_init.c | 6 ++++++ 2 files changed, 53 insertions(+)
diff --git a/libavcodec/x86/diracdsp.asm b/libavcodec/x86/diracdsp.asm index a042413..9db7b67 100644 --- a/libavcodec/x86/diracdsp.asm +++ b/libavcodec/x86/diracdsp.asm @@ -22,6 +22,8 @@ SECTION_RODATA pw_7: times 8 dw 7 +convert_to_unsigned_10bit: times 4 dd 0x200 +clip_10bit: times 8 dw 0x3ff cextern pw_3 cextern pw_16 @@ -172,6 +174,48 @@ cglobal put_signed_rect_clamped_%1, 5,9,3, dst, dst_stride, src, src_stride, w, RET %endm +%macro PUT_RECT_10 0 +; void put_signed_rect_clamped_10(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height) +cglobal put_signed_rect_clamped_10, 6, 9, 6, dst, dst_stride, src, src_stride, w, h + + neg wq + neg hq + mov r6, srcq + mov r7, dstq + mov r8, wq + pxor m2, m2 + mova m3, [clip_10bit] + mova m4, [convert_to_unsigned_10bit] + + .loop_h: + mov srcq, r6 + mov dstq, r7 + mov wq, r8 + + .loop_w: + movu m0, [srcq+0*mmsize] + movu m1, [srcq+1*mmsize] + + paddd m0, m4 + paddd m1, m4 + packusdw m0, m0, m1 + CLIPW m0, m2, m3 ; packusdw saturates so it's fine + + movu [dstq], m0 + + add srcq, 2*mmsize + add dstq, 1*mmsize + add wq, 8 + jl .loop_w + + add r6, src_strideq + add r7, dst_strideq + add hq, 1 + jl .loop_h + + RET +%endm + %macro ADD_RECT 1 ; void add_rect_clamped(uint8_t *dst, uint16_t *src, int stride, int16_t *idwt, int idwt_stride, int width, int height) cglobal add_rect_clamped_%1, 7,9,3, dst, src, stride, idwt, idwt_stride, w, h @@ -263,3 +307,6 @@ ADD_RECT sse2 HPEL_FILTER sse2 ADD_OBMC 32, sse2 ADD_OBMC 16, sse2 + +INIT_XMM sse4 +PUT_RECT_10 diff --git a/libavcodec/x86/diracdsp_init.c b/libavcodec/x86/diracdsp_init.c index 5fae798..4786eea 100644 --- a/libavcodec/x86/diracdsp_init.c +++ b/libavcodec/x86/diracdsp_init.c @@ -46,6 +46,8 @@ void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); +void ff_put_signed_rect_clamped_10_sse4(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height); + #if HAVE_YASM #define HPEL_FILTER(MMSIZE, EXT) \ @@ -184,4 +186,8 @@ void ff_diracdsp_init_x86(DiracDSPContext* c) c->put_dirac_pixels_tab[2][0] = ff_put_dirac_pixels32_sse2; c->avg_dirac_pixels_tab[2][0] = ff_avg_dirac_pixels32_sse2; } + + if (EXTERNAL_SSE4(mm_flags)) { + c->put_signed_rect_clamped[1] = ff_put_signed_rect_clamped_10_sse4; + } } -- 2.8.1.369.geae769a _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel