vp8dsp: Don't use MMX registers in put_vp8_pixels8

Andreas Rheinhardt via ffmpeg-cvslog Thu, 04 Dec 2025 06:41:21 -0800

This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.


commit 8fb6b0c73318718c2b18056aaecc91fabc7dafb9
Author:     Andreas Rheinhardt <[email protected]>
AuthorDate: Sun Nov 23 11:25:26 2025 +0100
Commit:     Andreas Rheinhardt <[email protected]>
CommitDate: Thu Dec 4 15:17:36 2025 +0100

    avcodec/x86/vp8dsp: Don't use MMX registers in put_vp8_pixels8
    
    Use GPRs on x64 and xmm registers else (using GPRs reduces codesize).
    This avoids clobbering the floating point state and therefore no longer
    breaks the ABI.
    No change in benchmarks here.
    
    Reviewed-by: Ronald S. Bultje <[email protected]>
    Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/vp8dsp.asm    | 20 ++++++++++++++------
 libavcodec/x86/vp8dsp_init.c |  9 +++------
 2 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm
index 7b836351e4..7dee979e20 100644
--- a/libavcodec/x86/vp8dsp.asm
+++ b/libavcodec/x86/vp8dsp.asm
@@ -676,14 +676,22 @@ FILTER_BILINEAR 4
 INIT_XMM ssse3
 FILTER_BILINEAR 8
 
-INIT_MMX mmx
-cglobal put_vp8_pixels8, 5, 5, 0, dst, dststride, src, srcstride, height
+INIT_XMM sse2
+cglobal put_vp8_pixels8, 5, 5+2*ARCH_X86_64, 2, dst, dststride, src, 
srcstride, height
 .nextrow:
-    movq    mm0, [srcq+srcstrideq*0]
-    movq    mm1, [srcq+srcstrideq*1]
+%if ARCH_X86_64
+    mov     r5q, [srcq+srcstrideq*0]
+    mov     r6q, [srcq+srcstrideq*1]
     lea    srcq, [srcq+srcstrideq*2]
-    movq [dstq+dststrideq*0], mm0
-    movq [dstq+dststrideq*1], mm1
+    mov [dstq+dststrideq*0], r5q
+    mov [dstq+dststrideq*1], r6q
+%else
+    movq     m0, [srcq+srcstrideq*0]
+    movq     m1, [srcq+srcstrideq*1]
+    lea    srcq, [srcq+srcstrideq*2]
+    movq [dstq+dststrideq*0], m0
+    movq [dstq+dststrideq*1], m1
+%endif
     lea    dstq, [dstq+dststrideq*2]
     sub heightd, 2
     jg .nextrow
diff --git a/libavcodec/x86/vp8dsp_init.c b/libavcodec/x86/vp8dsp_init.c
index 00733a2564..40aa52c7f0 100644
--- a/libavcodec/x86/vp8dsp_init.c
+++ b/libavcodec/x86/vp8dsp_init.c
@@ -88,7 +88,7 @@ void ff_put_vp8_bilinear8_v_ssse3 (uint8_t *dst, ptrdiff_t 
dststride,
                                    int height, int mx, int my);
 
 
-void ff_put_vp8_pixels8_mmx (uint8_t *dst, ptrdiff_t dststride,
+void ff_put_vp8_pixels8_sse2(uint8_t *dst, ptrdiff_t dststride,
                              const uint8_t *src, ptrdiff_t srcstride,
                              int height, int mx, int my);
 void ff_put_vp8_pixels16_sse(uint8_t *dst, ptrdiff_t dststride,
@@ -252,17 +252,14 @@ av_cold void ff_vp78dsp_init_x86(VP8DSPContext *c)
 {
     int cpu_flags = av_get_cpu_flags();
 
-    if (EXTERNAL_MMX(cpu_flags)) {
-        c->put_vp8_epel_pixels_tab[1][0][0]     =
-        c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmx;
-    }
-
     if (EXTERNAL_SSE(cpu_flags)) {
         c->put_vp8_epel_pixels_tab[0][0][0]     =
         c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
     }
 
     if (EXTERNAL_SSE2_SLOW(cpu_flags)) {
+        c->put_vp8_epel_pixels_tab[1][0][0]     =
+        c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_sse2;
         VP8_LUMA_MC_FUNC(0, 16, sse2);
         VP8_MC_FUNC(1, 8, sse2);
         VP8_BILINEAR_MC_FUNC(0, 16, sse2);

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-cvslog] [ffmpeg] 02/15: avcodec/x86/vp8dsp: Don't use MMX registers in put_vp8_pixels8

Reply via email to