This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit 36204fbc3c593f6d13a3dac7a2db448941e3a9e5 Author: Andreas Rheinhardt <[email protected]> AuthorDate: Sun Nov 30 20:49:51 2025 +0100 Commit: Andreas Rheinhardt <[email protected]> CommitDate: Mon Dec 8 19:27:51 2025 +0100 avcodec/vp9itxfm{,_16bpp}: Remove MMXEXT functions overridden by SSSE3 SSSE3 is already quite old (introduced 2006 for Intel, 2011 for AMD), so that the overwhelming majority of our users (particularly those that actually update their FFmpeg) will be using the SSSE3 versions. This commit therefore removes the MMXEXT functions overridden by them (which don't abide by the ABI) to get closer to a removal of emms_c. Reviewed-by: Ronald S. Bultje <[email protected]> Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavcodec/x86/vp9dsp_init.c | 2 -- libavcodec/x86/vp9dsp_init_16bpp_template.c | 4 ---- libavcodec/x86/vp9itxfm.asm | 30 +---------------------------- libavcodec/x86/vp9itxfm_16bpp.asm | 18 +---------------- 4 files changed, 2 insertions(+), 52 deletions(-) diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c index 85332da2b9..e479fd25ee 100644 --- a/libavcodec/x86/vp9dsp_init.c +++ b/libavcodec/x86/vp9dsp_init.c @@ -101,7 +101,6 @@ itxfm_func(iadst, idct, size, opt); \ itxfm_func(idct, iadst, size, opt); \ itxfm_func(iadst, iadst, size, opt) -itxfm_func(idct, idct, 4, mmxext); itxfm_func(idct, iadst, 4, sse2); itxfm_func(iadst, idct, 4, sse2); itxfm_func(iadst, iadst, 4, sse2); @@ -284,7 +283,6 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact) dsp->loop_filter_8[1][1] = ff_vp9_loop_filter_v_8_8_mmxext; init_fpel_func(4, 1, 4, avg, _8, mmxext); init_fpel_func(3, 1, 8, avg, _8, mmxext); - dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_mmxext; dsp->intra_pred[TX_4X4][HOR_DOWN_PRED] = ff_vp9_ipred_hd_4x4_mmxext; dsp->intra_pred[TX_4X4][VERT_LEFT_PRED] = ff_vp9_ipred_vl_4x4_mmxext; } diff --git a/libavcodec/x86/vp9dsp_init_16bpp_template.c b/libavcodec/x86/vp9dsp_init_16bpp_template.c index 54ff8892cf..969db94d3c 100644 --- a/libavcodec/x86/vp9dsp_init_16bpp_template.c +++ b/libavcodec/x86/vp9dsp_init_16bpp_template.c @@ -123,7 +123,6 @@ decl_ipred_fns(tm, BPC, mmxext, sse2); decl_itxfm_func(iwht, iwht, 4, BPC, mmxext); #if BPC == 10 -decl_itxfm_func(idct, idct, 4, BPC, mmxext); decl_itxfm_funcs(4, BPC, ssse3); decl_itxfm_funcs(16, BPC, avx512icl); decl_itxfm_func(idct, idct, 32, BPC, avx512icl); @@ -184,9 +183,6 @@ av_cold void INIT_FUNC(VP9DSPContext *dsp, int bitexact) init_ipred_func(tm, TM_VP8, 4, BPC, mmxext); if (!bitexact) { init_itx_func_one(4 /* lossless */, iwht, iwht, 4, BPC, mmxext); -#if BPC == 10 - init_itx_func(TX_4X4, DCT_DCT, idct, idct, 4, 10, mmxext); -#endif } } diff --git a/libavcodec/x86/vp9itxfm.asm b/libavcodec/x86/vp9itxfm.asm index fe650d519c..bd5966646c 100644 --- a/libavcodec/x86/vp9itxfm.asm +++ b/libavcodec/x86/vp9itxfm.asm @@ -223,49 +223,28 @@ cglobal vp9_iwht_iwht_4x4_add, 3, 3, 0, dst, stride, block, eob VP9_STORE_2X 2, 3, 6, 7, 4 %endmacro -%macro IDCT_4x4_FN 1 -INIT_MMX %1 +INIT_MMX ssse3 cglobal vp9_idct_idct_4x4_add, 4, 4, 0, dst, stride, block, eob -%if cpuflag(ssse3) cmp eobd, 4 ; 2x2 or smaller jg .idctfull cmp eobd, 1 ; faster path for when only DC is set jne .idct2x2 -%else - cmp eobd, 1 - jg .idctfull -%endif -%if cpuflag(ssse3) movd m0, [blockq] mova m5, [pw_11585x2] pmulhrsw m0, m5 pmulhrsw m0, m5 -%else - DEFINE_ARGS dst, stride, block, coef - movsx coefd, word [blockq] - imul coefd, 11585 - add coefd, 8192 - sar coefd, 14 - imul coefd, 11585 - add coefd, (8 << 14) + 8192 - sar coefd, 14 + 4 - movd m0, coefd -%endif pshufw m0, m0, 0 pxor m4, m4 movh [blockq], m4 -%if cpuflag(ssse3) pmulhrsw m0, [pw_2048] ; (x*2048 + (1<<14))>>15 <=> (x+8)>>4 -%endif VP9_STORE_2X 0, 0, 6, 7, 4 lea dstq, [dstq+2*strideq] VP9_STORE_2X 0, 0, 6, 7, 4 RET -%if cpuflag(ssse3) ; faster path for when only top left 2x2 block is set .idct2x2: movd m0, [blockq+0] @@ -285,16 +264,13 @@ cglobal vp9_idct_idct_4x4_add, 4, 4, 0, dst, stride, block, eob movh [blockq+ 8], m4 VP9_IDCT4_WRITEOUT RET -%endif .idctfull: ; generic full 4x4 idct/idct mova m0, [blockq+ 0] mova m1, [blockq+ 8] mova m2, [blockq+16] mova m3, [blockq+24] -%if cpuflag(ssse3) mova m6, [pw_11585x2] -%endif mova m7, [pd_8192] ; rounding VP9_IDCT4_1D TRANSPOSE4x4W 0, 1, 2, 3, 4 @@ -306,10 +282,6 @@ cglobal vp9_idct_idct_4x4_add, 4, 4, 0, dst, stride, block, eob mova [blockq+24], m4 VP9_IDCT4_WRITEOUT RET -%endmacro - -IDCT_4x4_FN mmxext -IDCT_4x4_FN ssse3 ;------------------------------------------------------------------------------------------- ; void vp9_iadst_iadst_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob); diff --git a/libavcodec/x86/vp9itxfm_16bpp.asm b/libavcodec/x86/vp9itxfm_16bpp.asm index ebe6222285..161c73f5a1 100644 --- a/libavcodec/x86/vp9itxfm_16bpp.asm +++ b/libavcodec/x86/vp9itxfm_16bpp.asm @@ -243,29 +243,21 @@ IWHT4_FN 12, 4095 ; 4x4 coefficients are 5+depth+sign bits, so for 10bpp, everything still fits ; in 15+1 words without additional effort, since the coefficients are 15bpp. -%macro IDCT4_10_FN 0 +INIT_MMX ssse3 cglobal vp9_idct_idct_4x4_add_10, 4, 4, 8, dst, stride, block, eob cmp eobd, 1 jg .idctfull ; dc-only pxor m4, m4 -%if cpuflag(ssse3) movd m0, [blockq] movd [blockq], m4 mova m5, [pw_11585x2] pmulhrsw m0, m5 pmulhrsw m0, m5 -%else - DEFINE_ARGS dst, stride, block, coef - DC_ONLY 4, m4 - movd m0, coefd -%endif pshufw m0, m0, 0 mova m5, [pw_1023] -%if cpuflag(ssse3) pmulhrsw m0, [pw_2048] ; (x*2048 + (1<<14))>>15 <=> (x+8)>>4 -%endif VP9_STORE_2X 0, 0, 6, 7, 4, 5 lea dstq, [dstq+2*strideq] VP9_STORE_2X 0, 0, 6, 7, 4, 5 @@ -281,9 +273,7 @@ cglobal vp9_idct_idct_4x4_add_10, 4, 4, 8, dst, stride, block, eob packssdw m2, [blockq+2*16+8] packssdw m3, [blockq+3*16+8] -%if cpuflag(ssse3) mova m6, [pw_11585x2] -%endif mova m7, [pd_8192] ; rounding VP9_IDCT4_1D TRANSPOSE4x4W 0, 1, 2, 3, 4 @@ -293,12 +283,6 @@ cglobal vp9_idct_idct_4x4_add_10, 4, 4, 8, dst, stride, block, eob ZERO_BLOCK blockq, 16, 4, m4 VP9_IDCT4_WRITEOUT RET -%endmacro - -INIT_MMX mmxext -IDCT4_10_FN -INIT_MMX ssse3 -IDCT4_10_FN %macro IADST4_FN 4 cglobal vp9_%1_%3_4x4_add_10, 3, 3, 0, dst, stride, block, eob _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
