This is an automated email from the git hooks/post-receive script.
Git pushed a commit to branch master
in repository ffmpeg.
The following commit(s) were added to refs/heads/master by this push:
new f0bfebc9ad avcodec/x86/hevc/add_res: Port
ff_hevc_add_residual_4_8_mmxext to SSE2
f0bfebc9ad is described below
commit f0bfebc9adce90d7c3d44f1a081a705389995b34
Author: Andreas Rheinhardt <[email protected]>
AuthorDate: Fri Jun 26 19:42:44 2026 +0200
Commit: Andreas Rheinhardt <[email protected]>
CommitDate: Tue Jun 30 22:50:17 2026 +0200
avcodec/x86/hevc/add_res: Port ff_hevc_add_residual_4_8_mmxext to SSE2
No change in performance here.
Signed-off-by: Andreas Rheinhardt <[email protected]>
---
libavcodec/x86/hevc/add_res.asm | 17 ++++++++---------
libavcodec/x86/hevc/dsp.h | 2 +-
libavcodec/x86/hevc/dsp_init.c | 5 ++---
3 files changed, 11 insertions(+), 13 deletions(-)
diff --git a/libavcodec/x86/hevc/add_res.asm b/libavcodec/x86/hevc/add_res.asm
index 3489e04e2b..70ae9fbc76 100644
--- a/libavcodec/x86/hevc/add_res.asm
+++ b/libavcodec/x86/hevc/add_res.asm
@@ -27,9 +27,9 @@ cextern pw_1023
%define max_pixels_10 pw_1023
; the add_res macros and functions were largely inspired by h264_idct.asm from
the x264 project
-%macro ADD_RES_MMX_4_8 1
- mova m0, [r1+%1]
- mova m2, [r1+%1+8]
+%macro ADD_RES_4_8 1
+ movq m0, [r1+%1]
+ movq m2, [r1+%1+8]
movd m1, [r0]
movd m3, [r0+r2]
@@ -45,14 +45,13 @@ cextern pw_1023
movd [r0+r2], m2
%endmacro
-
-INIT_MMX mmxext
-; void ff_hevc_add_residual_4_8_mmxext(uint8_t *dst, const int16_t *res,
ptrdiff_t stride)
-cglobal hevc_add_residual_4_8, 3, 3, 6
+INIT_XMM sse2
+; void ff_hevc_add_residual_4_8_sse2(uint8_t *dst, const int16_t *res,
ptrdiff_t stride)
+cglobal hevc_add_residual_4_8, 3, 3, 5
pxor m4, m4
- ADD_RES_MMX_4_8 0
+ ADD_RES_4_8 0
lea r0, [r0+r2*2]
- ADD_RES_MMX_4_8 16
+ ADD_RES_4_8 16
RET
%macro ADD_RES_SSE_8_8 1
diff --git a/libavcodec/x86/hevc/dsp.h b/libavcodec/x86/hevc/dsp.h
index 69d3987cdb..98dc8cff9a 100644
--- a/libavcodec/x86/hevc/dsp.h
+++ b/libavcodec/x86/hevc/dsp.h
@@ -167,7 +167,7 @@ void ff_hevc_put_qpel_hv8_8_avx512icl(int16_t *dst, const
uint8_t *_src, ptrdiff
// TRANSFORM_ADD
///////////////////////////////////////////////////////////////////////////////
-void ff_hevc_add_residual_4_8_mmxext(uint8_t *dst, const int16_t *res,
ptrdiff_t stride);
+void ff_hevc_add_residual_4_8_sse2(uint8_t *dst, const int16_t *res, ptrdiff_t
stride);
void ff_hevc_add_residual_8_8_sse2(uint8_t *dst, const int16_t *res, ptrdiff_t
stride);
void ff_hevc_add_residual_16_8_sse2(uint8_t *dst, const int16_t *res,
ptrdiff_t stride);
void ff_hevc_add_residual_32_8_sse2(uint8_t *dst, const int16_t *res,
ptrdiff_t stride);
diff --git a/libavcodec/x86/hevc/dsp_init.c b/libavcodec/x86/hevc/dsp_init.c
index ca3962b3f2..00f24aed41 100644
--- a/libavcodec/x86/hevc/dsp_init.c
+++ b/libavcodec/x86/hevc/dsp_init.c
@@ -817,10 +817,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int
bit_depth)
int cpu_flags = av_get_cpu_flags();
if (bit_depth == 8) {
- if (EXTERNAL_MMXEXT(cpu_flags)) {
- c->add_residual[0] = ff_hevc_add_residual_4_8_mmxext;
- }
if (EXTERNAL_SSE2(cpu_flags)) {
+ c->add_residual[0] = ff_hevc_add_residual_4_8_sse2;
+
c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;
#if ARCH_X86_64
_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]