This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit 8fb6b0c73318718c2b18056aaecc91fabc7dafb9 Author: Andreas Rheinhardt <[email protected]> AuthorDate: Sun Nov 23 11:25:26 2025 +0100 Commit: Andreas Rheinhardt <[email protected]> CommitDate: Thu Dec 4 15:17:36 2025 +0100 avcodec/x86/vp8dsp: Don't use MMX registers in put_vp8_pixels8 Use GPRs on x64 and xmm registers else (using GPRs reduces codesize). This avoids clobbering the floating point state and therefore no longer breaks the ABI. No change in benchmarks here. Reviewed-by: Ronald S. Bultje <[email protected]> Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavcodec/x86/vp8dsp.asm | 20 ++++++++++++++------ libavcodec/x86/vp8dsp_init.c | 9 +++------ 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm index 7b836351e4..7dee979e20 100644 --- a/libavcodec/x86/vp8dsp.asm +++ b/libavcodec/x86/vp8dsp.asm @@ -676,14 +676,22 @@ FILTER_BILINEAR 4 INIT_XMM ssse3 FILTER_BILINEAR 8 -INIT_MMX mmx -cglobal put_vp8_pixels8, 5, 5, 0, dst, dststride, src, srcstride, height +INIT_XMM sse2 +cglobal put_vp8_pixels8, 5, 5+2*ARCH_X86_64, 2, dst, dststride, src, srcstride, height .nextrow: - movq mm0, [srcq+srcstrideq*0] - movq mm1, [srcq+srcstrideq*1] +%if ARCH_X86_64 + mov r5q, [srcq+srcstrideq*0] + mov r6q, [srcq+srcstrideq*1] lea srcq, [srcq+srcstrideq*2] - movq [dstq+dststrideq*0], mm0 - movq [dstq+dststrideq*1], mm1 + mov [dstq+dststrideq*0], r5q + mov [dstq+dststrideq*1], r6q +%else + movq m0, [srcq+srcstrideq*0] + movq m1, [srcq+srcstrideq*1] + lea srcq, [srcq+srcstrideq*2] + movq [dstq+dststrideq*0], m0 + movq [dstq+dststrideq*1], m1 +%endif lea dstq, [dstq+dststrideq*2] sub heightd, 2 jg .nextrow diff --git a/libavcodec/x86/vp8dsp_init.c b/libavcodec/x86/vp8dsp_init.c index 00733a2564..40aa52c7f0 100644 --- a/libavcodec/x86/vp8dsp_init.c +++ b/libavcodec/x86/vp8dsp_init.c @@ -88,7 +88,7 @@ void ff_put_vp8_bilinear8_v_ssse3 (uint8_t *dst, ptrdiff_t dststride, int height, int mx, int my); -void ff_put_vp8_pixels8_mmx (uint8_t *dst, ptrdiff_t dststride, +void ff_put_vp8_pixels8_sse2(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride, int height, int mx, int my); void ff_put_vp8_pixels16_sse(uint8_t *dst, ptrdiff_t dststride, @@ -252,17 +252,14 @@ av_cold void ff_vp78dsp_init_x86(VP8DSPContext *c) { int cpu_flags = av_get_cpu_flags(); - if (EXTERNAL_MMX(cpu_flags)) { - c->put_vp8_epel_pixels_tab[1][0][0] = - c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmx; - } - if (EXTERNAL_SSE(cpu_flags)) { c->put_vp8_epel_pixels_tab[0][0][0] = c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse; } if (EXTERNAL_SSE2_SLOW(cpu_flags)) { + c->put_vp8_epel_pixels_tab[1][0][0] = + c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_sse2; VP8_LUMA_MC_FUNC(0, 16, sse2); VP8_MC_FUNC(1, 8, sse2); VP8_BILINEAR_MC_FUNC(0, 16, sse2); _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
