--- libavcodec/tests/x86/dct.c | 2 ++ libavcodec/x86/idctdsp_init.c | 10 ++++++++++ libavcodec/x86/simple_idct.h | 3 +++ libavcodec/x86/simple_idct10.asm | 6 ++++++ 4 files changed, 21 insertions(+)
diff --git a/libavcodec/tests/x86/dct.c b/libavcodec/tests/x86/dct.c index 97116570f4..a9b949f2b1 100644 --- a/libavcodec/tests/x86/dct.c +++ b/libavcodec/tests/x86/dct.c @@ -88,10 +88,12 @@ static const struct algo idct_tab_arch[] = { #if HAVE_YASM #if ARCH_X86_64 #if HAVE_SSE2_EXTERNAL + { "SIMPLE8-SSE2", ff_simple_idct8_sse2, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_SSE2}, { "SIMPLE10-SSE2", ff_simple_idct10_sse2, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_SSE2}, { "SIMPLE12-SSE2", ff_simple_idct12_sse2, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_SSE2, 1 }, #endif #if HAVE_AVX_EXTERNAL + { "SIMPLE8-AVX", ff_simple_idct8_avx, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_AVX}, { "SIMPLE10-AVX", ff_simple_idct10_avx, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_AVX}, { "SIMPLE12-AVX", ff_simple_idct12_avx, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_AVX, 1 }, #endif diff --git a/libavcodec/x86/idctdsp_init.c b/libavcodec/x86/idctdsp_init.c index 82530a5cc4..1e30496da0 100644 --- a/libavcodec/x86/idctdsp_init.c +++ b/libavcodec/x86/idctdsp_init.c @@ -95,6 +95,16 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, c->idct = ff_simple_idct_sse2; c->perm_type = FF_IDCT_PERM_SIMPLE; } + + if (ARCH_X86_64 && + !high_bit_depth && + avctx->lowres == 0 && + (avctx->idct_algo == FF_IDCT_AUTO || + avctx->idct_algo == FF_IDCT_SIMPLEAUTO || + avctx->idct_algo == FF_IDCT_SIMPLE)) { + c->idct = ff_simple_idct8_sse2; + c->perm_type = FF_IDCT_PERM_TRANSPOSE; + } } if (ARCH_X86_64 && avctx->lowres == 0) { diff --git a/libavcodec/x86/simple_idct.h b/libavcodec/x86/simple_idct.h index b19e910372..7a26e96b60 100644 --- a/libavcodec/x86/simple_idct.h +++ b/libavcodec/x86/simple_idct.h @@ -30,6 +30,9 @@ void ff_simple_idct_sse2(int16_t *block); void ff_simple_idct_add_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block); void ff_simple_idct_put_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block); +void ff_simple_idct8_sse2(int16_t *block); +void ff_simple_idct8_avx(int16_t *block); + void ff_simple_idct10_sse2(int16_t *block); void ff_simple_idct10_avx(int16_t *block); diff --git a/libavcodec/x86/simple_idct10.asm b/libavcodec/x86/simple_idct10.asm index 7cfd33eaa3..b4b47afcee 100644 --- a/libavcodec/x86/simple_idct10.asm +++ b/libavcodec/x86/simple_idct10.asm @@ -33,9 +33,11 @@ cextern pw_2 cextern pw_16 cextern pw_1023 cextern pw_4095 +pd_round_11: times 4 dd 1<<(11-1) pd_round_12: times 4 dd 1<<(12-1) pd_round_15: times 4 dd 1<<(15-1) pd_round_19: times 4 dd 1<<(19-1) +pd_round_20: times 4 dd 1<<(20-1) %macro CONST_DEC 3 const %1 @@ -68,6 +70,10 @@ CONST_DEC w7_min_w5, W7sh2, -W5sh2 SECTION .text %macro idct_fn 0 +cglobal simple_idct8, 1, 1, 16, block + IDCT_FN "", 11, "", 20 + RET + cglobal simple_idct10, 1, 1, 16, block IDCT_FN "", 12, "", 19 RET -- 2.12.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel