This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit ea37f49aedc924ee424f666174cf7fc6e9b3162d Author: Andreas Rheinhardt <[email protected]> AuthorDate: Sun Nov 30 20:26:44 2025 +0100 Commit: Andreas Rheinhardt <[email protected]> CommitDate: Mon Dec 8 19:27:44 2025 +0100 avcodec/vp9intrapred: Remove MMXEXT functions overridden by SSSE3 SSSE3 is already quite old (introduced 2006 for Intel, 2011 for AMD), so that the overwhelming majority of our users (particularly those that actually update their FFmpeg) will be using the SSSE3 versions. This commit therefore removes the MMXEXT functions overridden by them (which don't abide by the ABI) to get closer to a removal of emms_c. Reviewed-by: Ronald S. Bultje <[email protected]> Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavcodec/x86/vp9dsp_init.c | 12 ++-- libavcodec/x86/vp9intrapred.asm | 122 +++------------------------------------- 2 files changed, 13 insertions(+), 121 deletions(-) diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c index 25a007008b..85332da2b9 100644 --- a/libavcodec/x86/vp9dsp_init.c +++ b/libavcodec/x86/vp9dsp_init.c @@ -154,6 +154,8 @@ lpf_funcs(88, 16, avx); void ff_vp9_ipred_##type##_##size##x##size##_##opt(uint8_t *dst, ptrdiff_t stride, \ const uint8_t *l, const uint8_t *a) +ipred_func(4, hd, mmxext); +ipred_func(4, vl, mmxext); ipred_func(8, v, mmx); #define ipred_dc_funcs(size, opt) \ @@ -161,9 +163,6 @@ ipred_func(size, dc, opt); \ ipred_func(size, dc_left, opt); \ ipred_func(size, dc_top, opt) -ipred_dc_funcs(4, mmxext); -ipred_dc_funcs(8, mmxext); - #define ipred_dir_tm_funcs(size, opt) \ ipred_func(size, tm, opt); \ ipred_func(size, dl, opt); \ @@ -173,8 +172,6 @@ ipred_func(size, hu, opt); \ ipred_func(size, vl, opt); \ ipred_func(size, vr, opt) -ipred_dir_tm_funcs(4, mmxext); - ipred_func(16, v, sse); ipred_func(32, v, sse); @@ -288,9 +285,8 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact) init_fpel_func(4, 1, 4, avg, _8, mmxext); init_fpel_func(3, 1, 8, avg, _8, mmxext); dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_mmxext; - init_dc_ipred(4, mmxext); - init_dc_ipred(8, mmxext); - init_dir_tm_ipred(4, mmxext); + dsp->intra_pred[TX_4X4][HOR_DOWN_PRED] = ff_vp9_ipred_hd_4x4_mmxext; + dsp->intra_pred[TX_4X4][VERT_LEFT_PRED] = ff_vp9_ipred_vl_4x4_mmxext; } if (EXTERNAL_SSE(cpu_flags)) { diff --git a/libavcodec/x86/vp9intrapred.asm b/libavcodec/x86/vp9intrapred.asm index b67addd7e3..22390ca831 100644 --- a/libavcodec/x86/vp9intrapred.asm +++ b/libavcodec/x86/vp9intrapred.asm @@ -93,21 +93,14 @@ SECTION .text ; dc_NxN(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a) -%macro DC_4to8_FUNCS 0 +INIT_MMX ssse3 cglobal vp9_ipred_dc_4x4, 4, 4, 0, dst, stride, l, a movd m0, [lq] punpckldq m0, [aq] pxor m1, m1 psadbw m0, m1 -%if cpuflag(ssse3) pmulhrsw m0, [pw_4096] pshufb m0, m1 -%else - paddw m0, [pw_4] - psraw m0, 3 - punpcklbw m0, m0 - pshufw m0, m0, q0000 -%endif movd [dstq+strideq*0], m0 movd [dstq+strideq*1], m0 lea dstq, [dstq+strideq*2] @@ -124,15 +117,8 @@ cglobal vp9_ipred_dc_8x8, 4, 4, 0, dst, stride, l, a psadbw m0, m2 psadbw m1, m2 paddw m0, m1 -%if cpuflag(ssse3) pmulhrsw m0, [pw_2048] pshufb m0, m2 -%else - paddw m0, [pw_8] - psraw m0, 4 - punpcklbw m0, m0 - pshufw m0, m0, q0000 -%endif movq [dstq+strideq*0], m0 movq [dstq+strideq*1], m0 movq [dstq+strideq*2], m0 @@ -143,12 +129,7 @@ cglobal vp9_ipred_dc_8x8, 4, 4, 0, dst, stride, l, a movq [dstq+strideq*2], m0 movq [dstq+stride3q ], m0 RET -%endmacro -INIT_MMX mmxext -DC_4to8_FUNCS -INIT_MMX ssse3 -DC_4to8_FUNCS %macro DC_16to32_FUNCS 0 cglobal vp9_ipred_dc_16x16, 4, 4, 3, dst, stride, l, a @@ -238,15 +219,8 @@ cglobal vp9_ipred_dc_%1_4x4, 4, 4, 0, dst, stride, l, a movd m0, [%2q] pxor m1, m1 psadbw m0, m1 -%if cpuflag(ssse3) pmulhrsw m0, [pw_8192] pshufb m0, m1 -%else - paddw m0, [pw_2] - psraw m0, 2 - punpcklbw m0, m0 - pshufw m0, m0, q0000 -%endif movd [dstq+strideq*0], m0 movd [dstq+strideq*1], m0 lea dstq, [dstq+strideq*2] @@ -260,15 +234,8 @@ cglobal vp9_ipred_dc_%1_8x8, 4, 4, 0, dst, stride, l, a lea stride3q, [strideq*3] pxor m1, m1 psadbw m0, m1 -%if cpuflag(ssse3) pmulhrsw m0, [pw_4096] pshufb m0, m1 -%else - paddw m0, [pw_4] - psraw m0, 3 - punpcklbw m0, m0 - pshufw m0, m0, q0000 -%endif movq [dstq+strideq*0], m0 movq [dstq+strideq*1], m0 movq [dstq+strideq*2], m0 @@ -281,9 +248,6 @@ cglobal vp9_ipred_dc_%1_8x8, 4, 4, 0, dst, stride, l, a RET %endmacro -INIT_MMX mmxext -DC_1D_4to8_FUNCS top, a -DC_1D_4to8_FUNCS left, l INIT_MMX ssse3 DC_1D_4to8_FUNCS top, a DC_1D_4to8_FUNCS left, l @@ -548,33 +512,22 @@ H_XMM_FUNCS 4, 8 INIT_XMM avx H_XMM_FUNCS 4, 8 -%macro TM_MMX_FUNCS 0 +INIT_MMX ssse3 cglobal vp9_ipred_tm_4x4, 4, 4, 0, dst, stride, l, a pxor m1, m1 movd m0, [aq] pinsrw m2, [aq-1], 0 punpcklbw m0, m1 DEFINE_ARGS dst, stride, l, cnt -%if cpuflag(ssse3) mova m3, [pw_m256] mova m1, [pw_m255] pshufb m2, m3 -%else - punpcklbw m2, m1 - pshufw m2, m2, q0000 -%endif psubw m0, m2 mov cntq, 1 .loop: pinsrw m2, [lq+cntq*2], 0 -%if cpuflag(ssse3) pshufb m4, m2, m1 pshufb m2, m3 -%else - punpcklbw m2, m1 - pshufw m4, m2, q1111 - pshufw m2, m2, q0000 -%endif paddw m4, m0 paddw m2, m0 packuswb m4, m4 @@ -585,12 +538,6 @@ cglobal vp9_ipred_tm_4x4, 4, 4, 0, dst, stride, l, a dec cntq jge .loop RET -%endmacro - -INIT_MMX mmxext -TM_MMX_FUNCS -INIT_MMX ssse3 -TM_MMX_FUNCS %macro TM_XMM_FUNCS 0 cglobal vp9_ipred_tm_8x8, 4, 4, 5, dst, stride, l, a @@ -784,20 +731,11 @@ TM_XMM_FUNCS pavgb m%1, m%2 %endmacro -%macro DL_MMX_FUNCS 0 +INIT_MMX ssse3 cglobal vp9_ipred_dl_4x4, 4, 4, 0, dst, stride, l, a movq m1, [aq] -%if cpuflag(ssse3) pshufb m0, m1, [pb_0to5_2x7] pshufb m2, m1, [pb_2to6_3x7] -%else - punpckhbw m3, m1, m1 ; 44556677 - pand m0, m1, [pb_6xm1_2x0] ; 012345__ - pand m3, [pb_6x0_2xm1] ; ______77 - psrlq m2, m1, 16 ; 234567__ - por m0, m3 ; 01234577 - por m2, m3 ; 23456777 -%endif psrlq m1, 8 LOWPASS 0, 1, 2, 3 @@ -810,12 +748,6 @@ cglobal vp9_ipred_dl_4x4, 4, 4, 0, dst, stride, l, a movd [dstq+strideq*0], m0 movd [dstq+strideq*2], m1 RET -%endmacro - -INIT_MMX mmxext -DL_MMX_FUNCS -INIT_MMX ssse3 -DL_MMX_FUNCS %macro DL_XMM_FUNCS 0 cglobal vp9_ipred_dl_8x8, 4, 4, 4, dst, stride, stride5, a @@ -964,14 +896,14 @@ DL_XMM_FUNCS ; dr -%macro DR_MMX_FUNCS 0 +INIT_MMX ssse3 cglobal vp9_ipred_dr_4x4, 4, 4, 0, dst, stride, l, a movd m0, [lq] punpckldq m0, [aq-1] movd m1, [aq+3] DEFINE_ARGS dst, stride, stride3 lea stride3q, [strideq*3] - PALIGNR m1, m0, 1, m3 + palignr m1, m0, 1 psrlq m2, m1, 8 LOWPASS 0, 1, 2, 3 @@ -983,12 +915,6 @@ cglobal vp9_ipred_dr_4x4, 4, 4, 0, dst, stride, l, a psrlq m0, 8 movd [dstq+strideq*0], m0 RET -%endmacro - -INIT_MMX mmxext -DR_MMX_FUNCS -INIT_MMX ssse3 -DR_MMX_FUNCS %macro DR_XMM_FUNCS 0 cglobal vp9_ipred_dr_8x8, 4, 4, 4, dst, stride, l, a @@ -1266,7 +1192,7 @@ VL_XMM_FUNCS ; vr -%macro VR_MMX_FUNCS 0 +INIT_MMX ssse3 cglobal vp9_ipred_vr_4x4, 4, 4, 0, dst, stride, l, a movq m1, [aq-1] punpckldq m2, [lq] @@ -1274,7 +1200,7 @@ cglobal vp9_ipred_vr_4x4, 4, 4, 0, dst, stride, l, a DEFINE_ARGS dst, stride, stride3 lea stride3q, [strideq*3] pavgb m0, m1 - PALIGNR m1, m2, 5, m3 + palignr m1, m2, 5 psrlq m2, m1, 8 psllq m3, m1, 8 LOWPASS 2, 1, 3, 4 @@ -1284,7 +1210,6 @@ cglobal vp9_ipred_vr_4x4, 4, 4, 0, dst, stride, l, a ; IABC | m0 contains ABCDxxxx ; JEFG | m2 contains xJIEFGHx -%if cpuflag(ssse3) punpckldq m0, m2 pshufb m2, [pb_13456_3xm1] movd [dstq+strideq*0], m0 @@ -1293,24 +1218,7 @@ cglobal vp9_ipred_vr_4x4, 4, 4, 0, dst, stride, l, a psrlq m2, 8 movd [dstq+strideq*2], m0 movd [dstq+strideq*1], m2 -%else - psllq m1, m2, 40 - psrlq m2, 24 - movd [dstq+strideq*0], m0 - movd [dstq+strideq*1], m2 - PALIGNR m0, m1, 7, m3 - psllq m1, 8 - PALIGNR m2, m1, 7, m3 - movd [dstq+strideq*2], m0 - movd [dstq+stride3q ], m2 -%endif RET -%endmacro - -INIT_MMX mmxext -VR_MMX_FUNCS -INIT_MMX ssse3 -VR_MMX_FUNCS %macro VR_XMM_FUNCS 1 ; n_xmm_regs for 16x16 cglobal vp9_ipred_vr_8x8, 4, 4, 5, dst, stride, l, a @@ -1688,16 +1596,10 @@ HD_XMM_FUNCS INIT_XMM avx HD_XMM_FUNCS -%macro HU_MMX_FUNCS 0 +INIT_MMX ssse3 cglobal vp9_ipred_hu_4x4, 3, 3, 0, dst, stride, l movd m0, [lq] -%if cpuflag(ssse3) pshufb m0, [pb_0to2_5x3] -%else - punpcklbw m1, m0, m0 ; 00112233 - pshufw m1, m1, q3333 ; 33333333 - punpckldq m0, m1 ; 01233333 -%endif psrlq m1, m0, 8 psrlq m2, m1, 8 LOWPASS 2, 1, 0, 3 @@ -1705,7 +1607,7 @@ cglobal vp9_ipred_hu_4x4, 3, 3, 0, dst, stride, l DEFINE_ARGS dst, stride, stride3 lea stride3q, [strideq*3] SBUTTERFLY bw, 1, 2, 0 - PALIGNR m2, m1, 2, m0 + palignr m2, m1, 2 movd [dstq+strideq*0], m1 movd [dstq+strideq*1], m2 punpckhdq m1, m1 @@ -1713,12 +1615,6 @@ cglobal vp9_ipred_hu_4x4, 3, 3, 0, dst, stride, l movd [dstq+strideq*2], m1 movd [dstq+stride3q ], m2 RET -%endmacro - -INIT_MMX mmxext -HU_MMX_FUNCS -INIT_MMX ssse3 -HU_MMX_FUNCS %macro HU_XMM_FUNCS 1 ; n_xmm_regs in hu_32x32 cglobal vp9_ipred_hu_8x8, 3, 4, 4, dst, stride, l _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
