--- libavcodec/x86/dirac_dwt_10bit.asm | 3 ++- libavcodec/x86/dirac_dwt_init_10bit.c | 13 +++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/libavcodec/x86/dirac_dwt_10bit.asm b/libavcodec/x86/dirac_dwt_10bit.asm index ae110d2945..2e039e11ea 100644 --- a/libavcodec/x86/dirac_dwt_10bit.asm +++ b/libavcodec/x86/dirac_dwt_10bit.asm @@ -25,7 +25,7 @@ SECTION_RODATA cextern pd_1 pd_2: times 8 dd 2 -pd_8: times 4 dd 8 +pd_8: times 8 dd 8 SECTION .text @@ -202,6 +202,7 @@ HAAR_HORIZONTAL HAAR_VERTICAL INIT_YMM avx2 +DD97_VERTICAL_HI HAAR_HORIZONTAL HAAR_VERTICAL LEGALL53_VERTICAL_HI diff --git a/libavcodec/x86/dirac_dwt_init_10bit.c b/libavcodec/x86/dirac_dwt_init_10bit.c index 51d6eeae93..f103a56176 100644 --- a/libavcodec/x86/dirac_dwt_init_10bit.c +++ b/libavcodec/x86/dirac_dwt_init_10bit.c @@ -24,6 +24,7 @@ #include "libavcodec/dirac_dwt.h" void ff_dd97_vertical_hi_sse2(int32_t *b0, int32_t *b1, int32_t *b2, int32_t *b3, int32_t *b4, int width); +void ff_dd97_vertical_hi_avx2(int32_t *b0, int32_t *b1, int32_t *b2, int32_t *b3, int32_t *b4, int width); void ff_legall53_vertical_hi_sse2(int32_t *b0, int32_t *b1, int32_t *b2, int width); void ff_legall53_vertical_lo_sse2(int32_t *b0, int32_t *b1, int32_t *b2, int width); @@ -137,7 +138,15 @@ static void dd97_vertical_hi_sse2(int32_t *b0, int32_t *b1, int32_t *b2, ff_dd97_vertical_hi_sse2(b0, b1, b2, b3, b4, i); for(; i<width; i++) b2[i] = COMPOSE_DD97iH0(b0[i], b1[i], b2[i], b3[i], b4[i]); +} +static void dd97_vertical_hi_avx2(int32_t *b0, int32_t *b1, int32_t *b2, + int32_t *b3, int32_t *b4, int width) +{ + int i = width & ~7; + ff_dd97_vertical_hi_avx2(b0, b1, b2, b3, b4, i); + for(; i<width; i++) + b2[i] = COMPOSE_DD97iH0(b0[i], b1[i], b2[i], b3[i], b4[i]); } av_cold void ff_spatial_idwt_init_10bit_x86(DWTContext *d, enum dwt_type type) @@ -179,6 +188,10 @@ av_cold void ff_spatial_idwt_init_10bit_x86(DWTContext *d, enum dwt_type type) if (EXTERNAL_AVX2(cpu_flags)) { switch (type) { + case DWT_DIRAC_DD9_7: + d->vertical_compose_h0 = (void*)dd97_vertical_hi_avx2; + d->vertical_compose_l0 = (void*)legall53_vertical_lo_avx2; + break; case DWT_DIRAC_LEGALL5_3: d->vertical_compose_h0 = (void*)legall53_vertical_hi_avx2; d->vertical_compose_l0 = (void*)legall53_vertical_lo_avx2; -- 2.17.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel