This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

commit ea37f49aedc924ee424f666174cf7fc6e9b3162d
Author:     Andreas Rheinhardt <[email protected]>
AuthorDate: Sun Nov 30 20:26:44 2025 +0100
Commit:     Andreas Rheinhardt <[email protected]>
CommitDate: Mon Dec 8 19:27:44 2025 +0100

    avcodec/vp9intrapred: Remove MMXEXT functions overridden by SSSE3
    
    SSSE3 is already quite old (introduced 2006 for Intel, 2011 for AMD),
    so that the overwhelming majority of our users (particularly those
    that actually update their FFmpeg) will be using the SSSE3 versions.
    This commit therefore removes the MMXEXT functions overridden
    by them (which don't abide by the ABI) to get closer to a removal
    of emms_c.
    
    Reviewed-by: Ronald S. Bultje <[email protected]>
    Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/vp9dsp_init.c    |  12 ++--
 libavcodec/x86/vp9intrapred.asm | 122 +++-------------------------------------
 2 files changed, 13 insertions(+), 121 deletions(-)

diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c
index 25a007008b..85332da2b9 100644
--- a/libavcodec/x86/vp9dsp_init.c
+++ b/libavcodec/x86/vp9dsp_init.c
@@ -154,6 +154,8 @@ lpf_funcs(88, 16, avx);
 void ff_vp9_ipred_##type##_##size##x##size##_##opt(uint8_t *dst, ptrdiff_t 
stride, \
                                                    const uint8_t *l, const 
uint8_t *a)
 
+ipred_func(4, hd, mmxext);
+ipred_func(4, vl, mmxext);
 ipred_func(8, v, mmx);
 
 #define ipred_dc_funcs(size, opt) \
@@ -161,9 +163,6 @@ ipred_func(size, dc, opt); \
 ipred_func(size, dc_left, opt); \
 ipred_func(size, dc_top, opt)
 
-ipred_dc_funcs(4, mmxext);
-ipred_dc_funcs(8, mmxext);
-
 #define ipred_dir_tm_funcs(size, opt) \
 ipred_func(size, tm, opt); \
 ipred_func(size, dl, opt); \
@@ -173,8 +172,6 @@ ipred_func(size, hu, opt); \
 ipred_func(size, vl, opt); \
 ipred_func(size, vr, opt)
 
-ipred_dir_tm_funcs(4, mmxext);
-
 ipred_func(16, v, sse);
 ipred_func(32, v, sse);
 
@@ -288,9 +285,8 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int 
bpp, int bitexact)
         init_fpel_func(4, 1,  4, avg, _8, mmxext);
         init_fpel_func(3, 1,  8, avg, _8, mmxext);
         dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_mmxext;
-        init_dc_ipred(4, mmxext);
-        init_dc_ipred(8, mmxext);
-        init_dir_tm_ipred(4, mmxext);
+        dsp->intra_pred[TX_4X4][HOR_DOWN_PRED] = ff_vp9_ipred_hd_4x4_mmxext;
+        dsp->intra_pred[TX_4X4][VERT_LEFT_PRED] = ff_vp9_ipred_vl_4x4_mmxext;
     }
 
     if (EXTERNAL_SSE(cpu_flags)) {
diff --git a/libavcodec/x86/vp9intrapred.asm b/libavcodec/x86/vp9intrapred.asm
index b67addd7e3..22390ca831 100644
--- a/libavcodec/x86/vp9intrapred.asm
+++ b/libavcodec/x86/vp9intrapred.asm
@@ -93,21 +93,14 @@ SECTION .text
 
 ; dc_NxN(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)
 
-%macro DC_4to8_FUNCS 0
+INIT_MMX ssse3
 cglobal vp9_ipred_dc_4x4, 4, 4, 0, dst, stride, l, a
     movd                    m0, [lq]
     punpckldq               m0, [aq]
     pxor                    m1, m1
     psadbw                  m0, m1
-%if cpuflag(ssse3)
     pmulhrsw                m0, [pw_4096]
     pshufb                  m0, m1
-%else
-    paddw                   m0, [pw_4]
-    psraw                   m0, 3
-    punpcklbw               m0, m0
-    pshufw                  m0, m0, q0000
-%endif
     movd      [dstq+strideq*0], m0
     movd      [dstq+strideq*1], m0
     lea                   dstq, [dstq+strideq*2]
@@ -124,15 +117,8 @@ cglobal vp9_ipred_dc_8x8, 4, 4, 0, dst, stride, l, a
     psadbw                  m0, m2
     psadbw                  m1, m2
     paddw                   m0, m1
-%if cpuflag(ssse3)
     pmulhrsw                m0, [pw_2048]
     pshufb                  m0, m2
-%else
-    paddw                   m0, [pw_8]
-    psraw                   m0, 4
-    punpcklbw               m0, m0
-    pshufw                  m0, m0, q0000
-%endif
     movq      [dstq+strideq*0], m0
     movq      [dstq+strideq*1], m0
     movq      [dstq+strideq*2], m0
@@ -143,12 +129,7 @@ cglobal vp9_ipred_dc_8x8, 4, 4, 0, dst, stride, l, a
     movq      [dstq+strideq*2], m0
     movq      [dstq+stride3q ], m0
     RET
-%endmacro
 
-INIT_MMX mmxext
-DC_4to8_FUNCS
-INIT_MMX ssse3
-DC_4to8_FUNCS
 
 %macro DC_16to32_FUNCS 0
 cglobal vp9_ipred_dc_16x16, 4, 4, 3, dst, stride, l, a
@@ -238,15 +219,8 @@ cglobal vp9_ipred_dc_%1_4x4, 4, 4, 0, dst, stride, l, a
     movd                    m0, [%2q]
     pxor                    m1, m1
     psadbw                  m0, m1
-%if cpuflag(ssse3)
     pmulhrsw                m0, [pw_8192]
     pshufb                  m0, m1
-%else
-    paddw                   m0, [pw_2]
-    psraw                   m0, 2
-    punpcklbw               m0, m0
-    pshufw                  m0, m0, q0000
-%endif
     movd      [dstq+strideq*0], m0
     movd      [dstq+strideq*1], m0
     lea                   dstq, [dstq+strideq*2]
@@ -260,15 +234,8 @@ cglobal vp9_ipred_dc_%1_8x8, 4, 4, 0, dst, stride, l, a
     lea               stride3q, [strideq*3]
     pxor                    m1, m1
     psadbw                  m0, m1
-%if cpuflag(ssse3)
     pmulhrsw                m0, [pw_4096]
     pshufb                  m0, m1
-%else
-    paddw                   m0, [pw_4]
-    psraw                   m0, 3
-    punpcklbw               m0, m0
-    pshufw                  m0, m0, q0000
-%endif
     movq      [dstq+strideq*0], m0
     movq      [dstq+strideq*1], m0
     movq      [dstq+strideq*2], m0
@@ -281,9 +248,6 @@ cglobal vp9_ipred_dc_%1_8x8, 4, 4, 0, dst, stride, l, a
     RET
 %endmacro
 
-INIT_MMX mmxext
-DC_1D_4to8_FUNCS top,  a
-DC_1D_4to8_FUNCS left, l
 INIT_MMX ssse3
 DC_1D_4to8_FUNCS top,  a
 DC_1D_4to8_FUNCS left, l
@@ -548,33 +512,22 @@ H_XMM_FUNCS 4, 8
 INIT_XMM avx
 H_XMM_FUNCS 4, 8
 
-%macro TM_MMX_FUNCS 0
+INIT_MMX ssse3
 cglobal vp9_ipred_tm_4x4, 4, 4, 0, dst, stride, l, a
     pxor                    m1, m1
     movd                    m0, [aq]
     pinsrw                  m2, [aq-1], 0
     punpcklbw               m0, m1
     DEFINE_ARGS dst, stride, l, cnt
-%if cpuflag(ssse3)
     mova                    m3, [pw_m256]
     mova                    m1, [pw_m255]
     pshufb                  m2, m3
-%else
-    punpcklbw               m2, m1
-    pshufw                  m2, m2, q0000
-%endif
     psubw                   m0, m2
     mov                   cntq, 1
 .loop:
     pinsrw                  m2, [lq+cntq*2], 0
-%if cpuflag(ssse3)
     pshufb                  m4, m2, m1
     pshufb                  m2, m3
-%else
-    punpcklbw               m2, m1
-    pshufw                  m4, m2, q1111
-    pshufw                  m2, m2, q0000
-%endif
     paddw                   m4, m0
     paddw                   m2, m0
     packuswb                m4, m4
@@ -585,12 +538,6 @@ cglobal vp9_ipred_tm_4x4, 4, 4, 0, dst, stride, l, a
     dec                   cntq
     jge .loop
     RET
-%endmacro
-
-INIT_MMX mmxext
-TM_MMX_FUNCS
-INIT_MMX ssse3
-TM_MMX_FUNCS
 
 %macro TM_XMM_FUNCS 0
 cglobal vp9_ipred_tm_8x8, 4, 4, 5, dst, stride, l, a
@@ -784,20 +731,11 @@ TM_XMM_FUNCS
     pavgb                  m%1, m%2
 %endmacro
 
-%macro DL_MMX_FUNCS 0
+INIT_MMX ssse3
 cglobal vp9_ipred_dl_4x4, 4, 4, 0, dst, stride, l, a
     movq                    m1, [aq]
-%if cpuflag(ssse3)
     pshufb                  m0, m1, [pb_0to5_2x7]
     pshufb                  m2, m1, [pb_2to6_3x7]
-%else
-    punpckhbw               m3, m1, m1              ; 44556677
-    pand                    m0, m1, [pb_6xm1_2x0]   ; 012345__
-    pand                    m3, [pb_6x0_2xm1]       ; ______77
-    psrlq                   m2, m1, 16              ; 234567__
-    por                     m0, m3                  ; 01234577
-    por                     m2, m3                  ; 23456777
-%endif
     psrlq                   m1, 8
     LOWPASS                  0, 1, 2, 3
 
@@ -810,12 +748,6 @@ cglobal vp9_ipred_dl_4x4, 4, 4, 0, dst, stride, l, a
     movd      [dstq+strideq*0], m0
     movd      [dstq+strideq*2], m1
     RET
-%endmacro
-
-INIT_MMX mmxext
-DL_MMX_FUNCS
-INIT_MMX ssse3
-DL_MMX_FUNCS
 
 %macro DL_XMM_FUNCS 0
 cglobal vp9_ipred_dl_8x8, 4, 4, 4, dst, stride, stride5, a
@@ -964,14 +896,14 @@ DL_XMM_FUNCS
 
 ; dr
 
-%macro DR_MMX_FUNCS 0
+INIT_MMX ssse3
 cglobal vp9_ipred_dr_4x4, 4, 4, 0, dst, stride, l, a
     movd                    m0, [lq]
     punpckldq               m0, [aq-1]
     movd                    m1, [aq+3]
     DEFINE_ARGS dst, stride, stride3
     lea               stride3q, [strideq*3]
-    PALIGNR                 m1, m0, 1, m3
+    palignr                 m1, m0, 1
     psrlq                   m2, m1, 8
     LOWPASS                  0, 1, 2, 3
 
@@ -983,12 +915,6 @@ cglobal vp9_ipred_dr_4x4, 4, 4, 0, dst, stride, l, a
     psrlq                   m0, 8
     movd      [dstq+strideq*0], m0
     RET
-%endmacro
-
-INIT_MMX mmxext
-DR_MMX_FUNCS
-INIT_MMX ssse3
-DR_MMX_FUNCS
 
 %macro DR_XMM_FUNCS 0
 cglobal vp9_ipred_dr_8x8, 4, 4, 4, dst, stride, l, a
@@ -1266,7 +1192,7 @@ VL_XMM_FUNCS
 
 ; vr
 
-%macro VR_MMX_FUNCS 0
+INIT_MMX ssse3
 cglobal vp9_ipred_vr_4x4, 4, 4, 0, dst, stride, l, a
     movq                    m1, [aq-1]
     punpckldq               m2, [lq]
@@ -1274,7 +1200,7 @@ cglobal vp9_ipred_vr_4x4, 4, 4, 0, dst, stride, l, a
     DEFINE_ARGS dst, stride, stride3
     lea               stride3q, [strideq*3]
     pavgb                   m0, m1
-    PALIGNR                 m1, m2, 5, m3
+    palignr                 m1, m2, 5
     psrlq                   m2, m1, 8
     psllq                   m3, m1, 8
     LOWPASS                  2,  1, 3, 4
@@ -1284,7 +1210,6 @@ cglobal vp9_ipred_vr_4x4, 4, 4, 0, dst, stride, l, a
     ; IABC  | m0 contains ABCDxxxx
     ; JEFG  | m2 contains xJIEFGHx
 
-%if cpuflag(ssse3)
     punpckldq               m0, m2
     pshufb                  m2, [pb_13456_3xm1]
     movd      [dstq+strideq*0], m0
@@ -1293,24 +1218,7 @@ cglobal vp9_ipred_vr_4x4, 4, 4, 0, dst, stride, l, a
     psrlq                   m2, 8
     movd      [dstq+strideq*2], m0
     movd      [dstq+strideq*1], m2
-%else
-    psllq                   m1, m2, 40
-    psrlq                   m2, 24
-    movd      [dstq+strideq*0], m0
-    movd      [dstq+strideq*1], m2
-    PALIGNR                 m0, m1, 7, m3
-    psllq                   m1, 8
-    PALIGNR                 m2, m1, 7, m3
-    movd      [dstq+strideq*2], m0
-    movd      [dstq+stride3q ], m2
-%endif
     RET
-%endmacro
-
-INIT_MMX mmxext
-VR_MMX_FUNCS
-INIT_MMX ssse3
-VR_MMX_FUNCS
 
 %macro VR_XMM_FUNCS 1 ; n_xmm_regs for 16x16
 cglobal vp9_ipred_vr_8x8, 4, 4, 5, dst, stride, l, a
@@ -1688,16 +1596,10 @@ HD_XMM_FUNCS
 INIT_XMM avx
 HD_XMM_FUNCS
 
-%macro HU_MMX_FUNCS 0
+INIT_MMX ssse3
 cglobal vp9_ipred_hu_4x4, 3, 3, 0, dst, stride, l
     movd                    m0, [lq]
-%if cpuflag(ssse3)
     pshufb                  m0, [pb_0to2_5x3]
-%else
-    punpcklbw               m1, m0, m0          ; 00112233
-    pshufw                  m1, m1, q3333       ; 33333333
-    punpckldq               m0, m1              ; 01233333
-%endif
     psrlq                   m1, m0, 8
     psrlq                   m2, m1, 8
     LOWPASS                  2,  1, 0, 3
@@ -1705,7 +1607,7 @@ cglobal vp9_ipred_hu_4x4, 3, 3, 0, dst, stride, l
     DEFINE_ARGS dst, stride, stride3
     lea               stride3q, [strideq*3]
     SBUTTERFLY              bw,  1, 2, 0
-    PALIGNR                 m2, m1, 2, m0
+    palignr                 m2, m1, 2
     movd      [dstq+strideq*0], m1
     movd      [dstq+strideq*1], m2
     punpckhdq               m1, m1
@@ -1713,12 +1615,6 @@ cglobal vp9_ipred_hu_4x4, 3, 3, 0, dst, stride, l
     movd      [dstq+strideq*2], m1
     movd      [dstq+stride3q ], m2
     RET
-%endmacro
-
-INIT_MMX mmxext
-HU_MMX_FUNCS
-INIT_MMX ssse3
-HU_MMX_FUNCS
 
 %macro HU_XMM_FUNCS 1 ; n_xmm_regs in hu_32x32
 cglobal vp9_ipred_hu_8x8, 3, 4, 4, dst, stride, l

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to