This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit 6e418af810ac58d21d40b3eb0f2c302148b618cb Author: Andreas Rheinhardt <[email protected]> AuthorDate: Sun Nov 30 18:08:38 2025 +0100 Commit: Andreas Rheinhardt <[email protected]> CommitDate: Mon Dec 8 19:27:05 2025 +0100 avcodec/vp9mc: Remove MMXEXT functions overridden by SSSE3 SSSE3 is already quite old (introduced 2006 for Intel, 2011 for AMD), so that the overwhelming majority of our users (particularly those that actually update their FFmpeg) will be using the SSSE3 versions. This commit therefore removes the MMXEXT functions overridden by them (which don't abide by the ABI) to get closer to a removal of emms_c. Reviewed-by: Ronald S. Bultje <[email protected]> Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavcodec/x86/vp9dsp_init.c | 20 +++++++++----------- libavcodec/x86/vp9dsp_init.h | 14 ++++++++++---- libavcodec/x86/vp9dsp_init_16bpp_template.c | 8 ++++---- libavcodec/x86/vp9mc.asm | 20 ++++++-------------- 4 files changed, 29 insertions(+), 33 deletions(-) diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c index c103751351..25a007008b 100644 --- a/libavcodec/x86/vp9dsp_init.c +++ b/libavcodec/x86/vp9dsp_init.c @@ -41,7 +41,6 @@ decl_fpel_func(put, 64, , avx); decl_fpel_func(avg, 32, _8, avx2); decl_fpel_func(avg, 64, _8, avx2); -decl_mc_funcs(4, mmxext, int16_t, 8, 8); decl_mc_funcs(8, sse2, int16_t, 8, 8); decl_mc_funcs(4, ssse3, int8_t, 32, 8); decl_mc_funcs(8, ssse3, int8_t, 32, 8); @@ -70,10 +69,11 @@ mc_rep_funcs(64, 32, 32, avx2, int8_t, 32, 8) extern const int8_t ff_filters_ssse3[3][15][4][32]; extern const int16_t ff_filters_sse2[3][15][8][8]; -filters_8tap_2d_fn2(put, 16, 8, 1, mmxext, sse2, sse2) -filters_8tap_2d_fn2(avg, 16, 8, 1, mmxext, sse2, sse2) -filters_8tap_2d_fn2(put, 16, 8, 1, ssse3, ssse3, ssse3) -filters_8tap_2d_fn2(avg, 16, 8, 1, ssse3, ssse3, ssse3) +filters_8tap_2d_fn2(put, 16, 8, 1, sse2, sse2) +filters_8tap_2d_fn2(avg, 16, 8, 1, sse2, sse2) +filters_8tap_2d_fn3(put, 16, 8, 1, ssse3, ssse3) +filters_8tap_2d_fn3(avg, 16, 8, 1, ssse3, ssse3) + #if ARCH_X86_64 && HAVE_AVX2_EXTERNAL filters_8tap_2d_fn(put, 64, 32, 8, 1, avx2, ssse3) filters_8tap_2d_fn(put, 32, 32, 8, 1, avx2, ssse3) @@ -81,10 +81,10 @@ filters_8tap_2d_fn(avg, 64, 32, 8, 1, avx2, ssse3) filters_8tap_2d_fn(avg, 32, 32, 8, 1, avx2, ssse3) #endif -filters_8tap_1d_fn3(put, 8, mmxext, sse2, sse2) -filters_8tap_1d_fn3(avg, 8, mmxext, sse2, sse2) -filters_8tap_1d_fn3(put, 8, ssse3, ssse3, ssse3) -filters_8tap_1d_fn3(avg, 8, ssse3, ssse3, ssse3) +filters_8tap_1d_fn3(put, 8, sse2, sse2) +filters_8tap_1d_fn3(avg, 8, sse2, sse2) +filters_8tap_1d_fn4(put, 8, ssse3, ssse3) +filters_8tap_1d_fn4(avg, 8, ssse3, ssse3) #if ARCH_X86_64 && HAVE_AVX2_EXTERNAL filters_8tap_1d_fn2(put, 64, 8, avx2, ssse3) filters_8tap_1d_fn2(put, 32, 8, avx2, ssse3) @@ -285,8 +285,6 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact) dsp->loop_filter_8[0][1] = ff_vp9_loop_filter_v_4_8_mmxext; dsp->loop_filter_8[1][0] = ff_vp9_loop_filter_h_8_8_mmxext; dsp->loop_filter_8[1][1] = ff_vp9_loop_filter_v_8_8_mmxext; - init_subpel2(4, 0, 4, put, 8, mmxext); - init_subpel2(4, 1, 4, avg, 8, mmxext); init_fpel_func(4, 1, 4, avg, _8, mmxext); init_fpel_func(3, 1, 8, avg, _8, mmxext); dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_mmxext; diff --git a/libavcodec/x86/vp9dsp_init.h b/libavcodec/x86/vp9dsp_init.h index 5690d16970..64747173c8 100644 --- a/libavcodec/x86/vp9dsp_init.h +++ b/libavcodec/x86/vp9dsp_init.h @@ -107,12 +107,15 @@ filter_8tap_1d_fn(op, sz, FILTER_8TAP_SMOOTH, f_opt, smooth, dir, dvar, bpp, o filters_8tap_1d_fn(op, sz, h, mx, bpp, opt, f_opt) \ filters_8tap_1d_fn(op, sz, v, my, bpp, opt, f_opt) -#define filters_8tap_1d_fn3(op, bpp, opt4, opt8, f_opt) \ +#define filters_8tap_1d_fn3(op, bpp, opt8, f_opt) \ filters_8tap_1d_fn2(op, 64, bpp, opt8, f_opt) \ filters_8tap_1d_fn2(op, 32, bpp, opt8, f_opt) \ filters_8tap_1d_fn2(op, 16, bpp, opt8, f_opt) \ filters_8tap_1d_fn2(op, 8, bpp, opt8, f_opt) \ -filters_8tap_1d_fn2(op, 4, bpp, opt4, f_opt) + +#define filters_8tap_1d_fn4(op, bpp, opt, f_opt) \ +filters_8tap_1d_fn3(op, bpp, opt, f_opt) \ +filters_8tap_1d_fn2(op, 4, bpp, opt, f_opt) \ #define filter_8tap_2d_fn(op, sz, f, f_opt, fname, align, bpp, bytes, opt) \ static void op##_8tap_##fname##_##sz##hv_##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \ @@ -133,12 +136,15 @@ filter_8tap_2d_fn(op, sz, FILTER_8TAP_REGULAR, f_opt, regular, align, bpp, bytes filter_8tap_2d_fn(op, sz, FILTER_8TAP_SHARP, f_opt, sharp, align, bpp, bytes, opt) \ filter_8tap_2d_fn(op, sz, FILTER_8TAP_SMOOTH, f_opt, smooth, align, bpp, bytes, opt) -#define filters_8tap_2d_fn2(op, align, bpp, bytes, opt4, opt8, f_opt) \ +#define filters_8tap_2d_fn2(op, align, bpp, bytes, opt8, f_opt) \ filters_8tap_2d_fn(op, 64, align, bpp, bytes, opt8, f_opt) \ filters_8tap_2d_fn(op, 32, align, bpp, bytes, opt8, f_opt) \ filters_8tap_2d_fn(op, 16, align, bpp, bytes, opt8, f_opt) \ filters_8tap_2d_fn(op, 8, align, bpp, bytes, opt8, f_opt) \ -filters_8tap_2d_fn(op, 4, align, bpp, bytes, opt4, f_opt) + +#define filters_8tap_2d_fn3(op, align, bpp, bytes, opt, f_opt) \ +filters_8tap_2d_fn2(op, align, bpp, bytes, opt, f_opt) \ +filters_8tap_2d_fn(op, 4, align, bpp, bytes, opt, f_opt) #define init_fpel_func(idx1, idx2, sz, type, bpp, opt) \ dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = \ diff --git a/libavcodec/x86/vp9dsp_init_16bpp_template.c b/libavcodec/x86/vp9dsp_init_16bpp_template.c index a6aa03bdc8..54ff8892cf 100644 --- a/libavcodec/x86/vp9dsp_init_16bpp_template.c +++ b/libavcodec/x86/vp9dsp_init_16bpp_template.c @@ -40,8 +40,8 @@ mc_rep_funcs(32, 16, 32, avx2, int16_t, 16, BPC) mc_rep_funcs(64, 32, 64, avx2, int16_t, 16, BPC) #endif -filters_8tap_2d_fn2(put, 16, BPC, 2, sse2, sse2, 16bpp) -filters_8tap_2d_fn2(avg, 16, BPC, 2, sse2, sse2, 16bpp) +filters_8tap_2d_fn3(put, 16, BPC, 2, sse2, 16bpp) +filters_8tap_2d_fn3(avg, 16, BPC, 2, sse2, 16bpp) #if HAVE_AVX2_EXTERNAL filters_8tap_2d_fn(put, 64, 32, BPC, 2, avx2, 16bpp) filters_8tap_2d_fn(avg, 64, 32, BPC, 2, avx2, 16bpp) @@ -51,8 +51,8 @@ filters_8tap_2d_fn(put, 16, 32, BPC, 2, avx2, 16bpp) filters_8tap_2d_fn(avg, 16, 32, BPC, 2, avx2, 16bpp) #endif -filters_8tap_1d_fn3(put, BPC, sse2, sse2, 16bpp) -filters_8tap_1d_fn3(avg, BPC, sse2, sse2, 16bpp) +filters_8tap_1d_fn4(put, BPC, sse2, 16bpp) +filters_8tap_1d_fn4(avg, BPC, sse2, 16bpp) #if HAVE_AVX2_EXTERNAL filters_8tap_1d_fn2(put, 64, BPC, avx2, 16bpp) filters_8tap_1d_fn2(avg, 64, BPC, avx2, 16bpp) diff --git a/libavcodec/x86/vp9mc.asm b/libavcodec/x86/vp9mc.asm index b9a62e79a8..682c6a6ea0 100644 --- a/libavcodec/x86/vp9mc.asm +++ b/libavcodec/x86/vp9mc.asm @@ -205,7 +205,7 @@ cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _8, 6, 6, 15, dst, dstride, src, sstride, h pxor m5, m5 mova m6, [pw_64] mova m7, [filteryq+ 0] -%if ARCH_X86_64 && mmsize > 8 +%if ARCH_X86_64 mova m8, [filteryq+ 16] mova m9, [filteryq+ 32] mova m10, [filteryq+ 48] @@ -226,7 +226,7 @@ cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _8, 6, 6, 15, dst, dstride, src, sstride, h punpcklbw m3, m5 punpcklbw m4, m5 pmullw m0, m7 -%if ARCH_X86_64 && mmsize > 8 +%if ARCH_X86_64 pmullw m1, m8 pmullw m2, m9 pmullw m3, m10 @@ -247,7 +247,7 @@ cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _8, 6, 6, 15, dst, dstride, src, sstride, h punpcklbw m1, m5 punpcklbw m3, m5 punpcklbw m4, m5 -%if ARCH_X86_64 && mmsize > 8 +%if ARCH_X86_64 pmullw m1, m12 pmullw m3, m13 pmullw m4, m14 @@ -276,10 +276,6 @@ cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _8, 6, 6, 15, dst, dstride, src, sstride, h RET %endmacro -INIT_MMX mmxext -filter_sse2_h_fn put -filter_sse2_h_fn avg - INIT_XMM sse2 filter_sse2_h_fn put filter_sse2_h_fn avg @@ -421,7 +417,7 @@ cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _8, 4, 7, 15, dst, dstride, src, sstride, f lea src4q, [srcq+sstrideq] sub srcq, sstride3q mova m7, [filteryq+ 0] -%if ARCH_X86_64 && mmsize > 8 +%ifdef m8 mova m8, [filteryq+ 16] mova m9, [filteryq+ 32] mova m10, [filteryq+ 48] @@ -446,7 +442,7 @@ cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _8, 4, 7, 15, dst, dstride, src, sstride, f punpcklbw m3, m5 punpcklbw m4, m5 pmullw m0, m7 -%if ARCH_X86_64 && mmsize > 8 +%ifdef m8 pmullw m1, m8 pmullw m2, m9 pmullw m3, m10 @@ -467,7 +463,7 @@ cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _8, 4, 7, 15, dst, dstride, src, sstride, f punpcklbw m1, m5 punpcklbw m3, m5 punpcklbw m4, m5 -%if ARCH_X86_64 && mmsize > 8 +%ifdef m8 pmullw m1, m12 pmullw m3, m13 pmullw m4, m14 @@ -496,10 +492,6 @@ cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _8, 4, 7, 15, dst, dstride, src, sstride, f RET %endmacro -INIT_MMX mmxext -filter_sse2_v_fn put -filter_sse2_v_fn avg - INIT_XMM sse2 filter_sse2_v_fn put filter_sse2_v_fn avg _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
