[FFmpeg-cvslog] avcodec/x86/ac3dsp_init: Remove obsolete 3dnow, MMX(EXT), SSE functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Tue Jun  7 00:55:25 2022 +0200| [fd98594a8831ce037a495b6d7e090bd8f81e83a1] | 
committer: Andreas Rheinhardt

avcodec/x86/ac3dsp_init: Remove obsolete 3dnow, MMX(EXT), SSE functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=fd98594a8831ce037a495b6d7e090bd8f81e83a1
---

 libavcodec/x86/ac3dsp.asm| 61 
 libavcodec/x86/ac3dsp_init.c | 18 -
 2 files changed, 79 deletions(-)

diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
index 4ddaa94320..c11a94ca93 100644
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -63,14 +63,7 @@ cglobal ac3_exponent_min, 3, 4, 2, exp, reuse_blks, expn, 
offset
 REP_RET
 %endmacro
 
-%define LOOP_ALIGN
-INIT_MMX mmx
-AC3_EXPONENT_MIN
-%if HAVE_MMXEXT_EXTERNAL
 %define LOOP_ALIGN ALIGN 16
-INIT_MMX mmxext
-AC3_EXPONENT_MIN
-%endif
 %if HAVE_SSE2_EXTERNAL
 INIT_XMM sse2
 AC3_EXPONENT_MIN
@@ -81,60 +74,6 @@ AC3_EXPONENT_MIN
 ; void ff_float_to_fixed24(int32_t *dst, const float *src, unsigned int len)
 ;-
 
-; The 3DNow! version is not bit-identical because pf2id uses truncation rather
-; than round-to-nearest.
-INIT_MMX 3dnow
-cglobal float_to_fixed24, 3, 3, 0, dst, src, len
-movq   m0, [pf_1_24]
-.loop:
-movq   m1, [srcq   ]
-movq   m2, [srcq+8 ]
-movq   m3, [srcq+16]
-movq   m4, [srcq+24]
-pfmul  m1, m0
-pfmul  m2, m0
-pfmul  m3, m0
-pfmul  m4, m0
-pf2id  m1, m1
-pf2id  m2, m2
-pf2id  m3, m3
-pf2id  m4, m4
-movq  [dstq   ], m1
-movq  [dstq+8 ], m2
-movq  [dstq+16], m3
-movq  [dstq+24], m4
-add  srcq, 32
-add  dstq, 32
-sub  lend, 8
-ja .loop
-femms
-RET
-
-INIT_XMM sse
-cglobal float_to_fixed24, 3, 3, 3, dst, src, len
-movaps m0, [pf_1_24]
-.loop:
-movaps m1, [srcq   ]
-movaps m2, [srcq+16]
-mulps  m1, m0
-mulps  m2, m0
-cvtps2pi  mm0, m1
-movhlpsm1, m1
-cvtps2pi  mm1, m1
-cvtps2pi  mm2, m2
-movhlpsm2, m2
-cvtps2pi  mm3, m2
-movq  [dstq   ], mm0
-movq  [dstq+ 8], mm1
-movq  [dstq+16], mm2
-movq  [dstq+24], mm3
-add  srcq, 32
-add  dstq, 32
-sub  lend, 8
-ja .loop
-emms
-RET
-
 INIT_XMM sse2
 cglobal float_to_fixed24, 3, 3, 9, dst, src, len
 movaps m0, [pf_1_24]
diff --git a/libavcodec/x86/ac3dsp_init.c b/libavcodec/x86/ac3dsp_init.c
index 5f20e6dc31..75a341bc95 100644
--- a/libavcodec/x86/ac3dsp_init.c
+++ b/libavcodec/x86/ac3dsp_init.c
@@ -24,12 +24,8 @@
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/ac3dsp.h"
 
-void ff_ac3_exponent_min_mmx   (uint8_t *exp, int num_reuse_blocks, int 
nb_coefs);
-void ff_ac3_exponent_min_mmxext(uint8_t *exp, int num_reuse_blocks, int 
nb_coefs);
 void ff_ac3_exponent_min_sse2  (uint8_t *exp, int num_reuse_blocks, int 
nb_coefs);
 
-void ff_float_to_fixed24_3dnow(int32_t *dst, const float *src, unsigned int 
len);
-void ff_float_to_fixed24_sse  (int32_t *dst, const float *src, unsigned int 
len);
 void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, unsigned int 
len);
 
 int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]);
@@ -41,20 +37,6 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int 
bit_exact)
 {
 int cpu_flags = av_get_cpu_flags();
 
-if (EXTERNAL_MMX(cpu_flags)) {
-c->ac3_exponent_min = ff_ac3_exponent_min_mmx;
-}
-if (EXTERNAL_AMD3DNOW(cpu_flags)) {
-if (!bit_exact) {
-c->float_to_fixed24 = ff_float_to_fixed24_3dnow;
-}
-}
-if (EXTERNAL_MMXEXT(cpu_flags)) {
-c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
-}
-if (EXTERNAL_SSE(cpu_flags)) {
-c->float_to_fixed24 = ff_float_to_fixed24_sse;
-}
 if (EXTERNAL_SSE2(cpu_flags)) {
 c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
 c->float_to_fixed24 = ff_float_to_fixed24_sse2;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/x86/vc1dsp_init: Remove obsolete 3dnow, MMX(EXT) functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Tue Jun  7 00:40:04 2022 +0200| [e02ffed004caf192c6307813e7b178c3044993c6] | 
committer: Andreas Rheinhardt

avcodec/x86/vc1dsp_init: Remove obsolete 3dnow, MMX(EXT) functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e02ffed004caf192c6307813e7b178c3044993c6
---

 libavcodec/x86/h264_chromamc.asm |  1 -
 libavcodec/x86/vc1dsp_init.c | 40 +---
 libavcodec/x86/vc1dsp_loopfilter.asm | 36 
 3 files changed, 18 insertions(+), 59 deletions(-)

diff --git a/libavcodec/x86/h264_chromamc.asm b/libavcodec/x86/h264_chromamc.asm
index b5a78b537d..c7c4a2d4aa 100644
--- a/libavcodec/x86/h264_chromamc.asm
+++ b/libavcodec/x86/h264_chromamc.asm
@@ -448,7 +448,6 @@ chroma_mc2_mmx_func avg, h264
 
 INIT_MMX 3dnow
 chroma_mc8_mmx_func avg, h264, _rnd
-chroma_mc8_mmx_func avg, vc1,  _nornd
 chroma_mc8_mmx_func avg, rv40
 chroma_mc4_mmx_func avg, h264
 chroma_mc4_mmx_func avg, rv40
diff --git a/libavcodec/x86/vc1dsp_init.c b/libavcodec/x86/vc1dsp_init.c
index 2fbf0b3a74..b1f68b098c 100644
--- a/libavcodec/x86/vc1dsp_init.c
+++ b/libavcodec/x86/vc1dsp_init.c
@@ -33,9 +33,10 @@
 #include "vc1dsp.h"
 #include "config.h"
 
-#define LOOP_FILTER(EXT) \
+#define LOOP_FILTER4(EXT) \
 void ff_vc1_v_loop_filter4_ ## EXT(uint8_t *src, ptrdiff_t stride, int pq); \
-void ff_vc1_h_loop_filter4_ ## EXT(uint8_t *src, ptrdiff_t stride, int pq); \
+void ff_vc1_h_loop_filter4_ ## EXT(uint8_t *src, ptrdiff_t stride, int pq);
+#define LOOP_FILTER816(EXT) \
 void ff_vc1_v_loop_filter8_ ## EXT(uint8_t *src, ptrdiff_t stride, int pq); \
 void ff_vc1_h_loop_filter8_ ## EXT(uint8_t *src, ptrdiff_t stride, int pq); \
 \
@@ -52,9 +53,10 @@ static void vc1_h_loop_filter16_ ## EXT(uint8_t *src, 
ptrdiff_t stride, int pq)
 }
 
 #if HAVE_X86ASM
-LOOP_FILTER(mmxext)
-LOOP_FILTER(sse2)
-LOOP_FILTER(ssse3)
+LOOP_FILTER4(mmxext)
+LOOP_FILTER816(sse2)
+LOOP_FILTER4(ssse3)
+LOOP_FILTER816(ssse3)
 
 void ff_vc1_h_loop_filter8_sse4(uint8_t *src, ptrdiff_t stride, int pq);
 
@@ -72,11 +74,7 @@ static void vc1_h_loop_filter16_sse4(uint8_t *src, ptrdiff_t 
stride, int pq)
 }
 
 DECLARE_FUNCTION(put_,  8, _mmx)
-DECLARE_FUNCTION(put_, 16, _mmx)
-DECLARE_FUNCTION(avg_,  8, _mmx)
-DECLARE_FUNCTION(avg_, 16, _mmx)
 DECLARE_FUNCTION(avg_,  8, _mmxext)
-DECLARE_FUNCTION(avg_, 16, _mmxext)
 DECLARE_FUNCTION(put_, 16, _sse2)
 DECLARE_FUNCTION(avg_, 16, _sse2)
 
@@ -86,8 +84,6 @@ void ff_put_vc1_chroma_mc8_nornd_mmx  (uint8_t *dst, uint8_t 
*src,
ptrdiff_t stride, int h, int x, int y);
 void ff_avg_vc1_chroma_mc8_nornd_mmxext(uint8_t *dst, uint8_t *src,
 ptrdiff_t stride, int h, int x, int y);
-void ff_avg_vc1_chroma_mc8_nornd_3dnow(uint8_t *dst, uint8_t *src,
-   ptrdiff_t stride, int h, int x, int y);
 void ff_put_vc1_chroma_mc8_nornd_ssse3(uint8_t *dst, uint8_t *src,
ptrdiff_t stride, int h, int x, int y);
 void ff_avg_vc1_chroma_mc8_nornd_ssse3(uint8_t *dst, uint8_t *src,
@@ -114,9 +110,10 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
 if (EXTERNAL_MMXEXT(cpu_flags))
 ff_vc1dsp_init_mmxext(dsp);
 
-#define ASSIGN_LF(EXT) \
+#define ASSIGN_LF4(EXT) \
 dsp->vc1_v_loop_filter4  = ff_vc1_v_loop_filter4_ ## EXT; \
-dsp->vc1_h_loop_filter4  = ff_vc1_h_loop_filter4_ ## EXT; \
+dsp->vc1_h_loop_filter4  = ff_vc1_h_loop_filter4_ ## EXT
+#define ASSIGN_LF816(EXT) \
 dsp->vc1_v_loop_filter8  = ff_vc1_v_loop_filter8_ ## EXT; \
 dsp->vc1_h_loop_filter8  = ff_vc1_h_loop_filter8_ ## EXT; \
 dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_ ## EXT; \
@@ -127,19 +124,12 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
 dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = 
ff_put_vc1_chroma_mc8_nornd_mmx;
 
 dsp->put_vc1_mspel_pixels_tab[1][0]  = put_vc1_mspel_mc00_8_mmx;
-dsp->put_vc1_mspel_pixels_tab[0][0]  = put_vc1_mspel_mc00_16_mmx;
-dsp->avg_vc1_mspel_pixels_tab[1][0]  = avg_vc1_mspel_mc00_8_mmx;
-dsp->avg_vc1_mspel_pixels_tab[0][0]  = avg_vc1_mspel_mc00_16_mmx;
-}
-if (EXTERNAL_AMD3DNOW(cpu_flags)) {
-dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = 
ff_avg_vc1_chroma_mc8_nornd_3dnow;
 }
 if (EXTERNAL_MMXEXT(cpu_flags)) {
-ASSIGN_LF(mmxext);
+ASSIGN_LF4(mmxext);
 dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = 
ff_avg_vc1_chroma_mc8_nornd_mmxext;
 
 dsp->avg_vc1_mspel_pixels_tab[1][0]

[FFmpeg-cvslog] avcodec/x86/diracdsp: Remove obsolete MMX(EXT) functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Tue Jun  7 01:37:59 2022 +0200| [d29a9c2aa68fc3eb6d61ff95c698e29316037583] | 
committer: Andreas Rheinhardt

avcodec/x86/diracdsp: Remove obsolete MMX(EXT) functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d29a9c2aa68fc3eb6d61ff95c698e29316037583
---

 libavcodec/x86/diracdsp.asm|  8 
 libavcodec/x86/diracdsp_init.c | 25 -
 2 files changed, 33 deletions(-)

diff --git a/libavcodec/x86/diracdsp.asm b/libavcodec/x86/diracdsp.asm
index 17145baf87..e5e2b11846 100644
--- a/libavcodec/x86/diracdsp.asm
+++ b/libavcodec/x86/diracdsp.asm
@@ -248,14 +248,6 @@ cglobal add_dirac_obmc%1_%2, 6,6,5, dst, src, stride, 
obmc, yblen
 %endm
 
 INIT_MMX
-%if ARCH_X86_64 == 0
-PUT_RECT mmx
-ADD_RECT mmx
-
-HPEL_FILTER mmx
-ADD_OBMC 32, mmx
-ADD_OBMC 16, mmx
-%endif
 ADD_OBMC 8, mmx
 
 INIT_XMM
diff --git a/libavcodec/x86/diracdsp_init.c b/libavcodec/x86/diracdsp_init.c
index 8baacf3129..f678759dc0 100644
--- a/libavcodec/x86/diracdsp_init.c
+++ b/libavcodec/x86/diracdsp_init.c
@@ -22,19 +22,14 @@
 #include "libavcodec/diracdsp.h"
 #include "fpel.h"
 
-void ff_add_rect_clamped_mmx(uint8_t *, const uint16_t *, int, const int16_t 
*, int, int, int);
 void ff_add_rect_clamped_sse2(uint8_t *, const uint16_t *, int, const int16_t 
*, int, int, int);
 
 void ff_add_dirac_obmc8_mmx(uint16_t *dst, const uint8_t *src, int stride, 
const uint8_t *obmc_weight, int yblen);
-void ff_add_dirac_obmc16_mmx(uint16_t *dst, const uint8_t *src, int stride, 
const uint8_t *obmc_weight, int yblen);
-void ff_add_dirac_obmc32_mmx(uint16_t *dst, const uint8_t *src, int stride, 
const uint8_t *obmc_weight, int yblen);
 
 void ff_add_dirac_obmc16_sse2(uint16_t *dst, const uint8_t *src, int stride, 
const uint8_t *obmc_weight, int yblen);
 void ff_add_dirac_obmc32_sse2(uint16_t *dst, const uint8_t *src, int stride, 
const uint8_t *obmc_weight, int yblen);
 
-void ff_put_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, 
int src_stride, int width, int height);
 void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t 
*src, int src_stride, int width, int height);
-void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const 
int16_t *src, int src_stride, int width, int height);
 void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const 
int16_t *src, int src_stride, int width, int height);
 void ff_put_signed_rect_clamped_10_sse4(uint8_t *dst, int dst_stride, const 
uint8_t *src, int src_stride, int width, int height);
 
@@ -87,16 +82,9 @@ static void OPNAME ## _dirac_pixels32_ ## EXT(uint8_t *dst, 
const uint8_t *src[5
 }\
 }
 
-DIRAC_PIXOP(put, mmx)
-DIRAC_PIXOP(avg, mmx)
-DIRAC_PIXOP(avg, mmxext)
-
 DIRAC_PIXOP(put, sse2)
 DIRAC_PIXOP(avg, sse2)
 
-#if !ARCH_X86_64
-HPEL_FILTER(8, mmx)
-#endif
 HPEL_FILTER(16, sse2)
 
 #endif // HAVE_X86ASM
@@ -108,19 +96,6 @@ void ff_diracdsp_init_x86(DiracDSPContext* c)
 
 if (EXTERNAL_MMX(mm_flags)) {
 c->add_dirac_obmc[0] = ff_add_dirac_obmc8_mmx;
-#if !ARCH_X86_64
-c->add_dirac_obmc[1] = ff_add_dirac_obmc16_mmx;
-c->add_dirac_obmc[2] = ff_add_dirac_obmc32_mmx;
-c->dirac_hpel_filter = dirac_hpel_filter_mmx;
-c->add_rect_clamped = ff_add_rect_clamped_mmx;
-c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_mmx;
-#endif
-PIXFUNC(put, 0, mmx);
-PIXFUNC(avg, 0, mmx);
-}
-
-if (EXTERNAL_MMXEXT(mm_flags)) {
-PIXFUNC(avg, 0, mmxext);
 }
 
 if (EXTERNAL_SSE2(mm_flags)) {

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/x86/audiodsp_init: Remove obsolete MMX(EXT) functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Tue Jun  7 01:22:52 2022 +0200| [3d716d38abdae1982e84e30becb57458244656bd] | 
committer: Andreas Rheinhardt

avcodec/x86/audiodsp_init: Remove obsolete MMX(EXT) functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3d716d38abdae1982e84e30becb57458244656bd
---

 libavcodec/x86/audiodsp.asm| 13 +
 libavcodec/x86/audiodsp_init.c | 10 --
 2 files changed, 1 insertion(+), 22 deletions(-)

diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm
index de395e5fa8..b604b0443c 100644
--- a/libavcodec/x86/audiodsp.asm
+++ b/libavcodec/x86/audiodsp.asm
@@ -23,8 +23,8 @@
 
 SECTION .text
 
-%macro SCALARPRODUCT 0
 ; int ff_scalarproduct_int16(int16_t *v1, int16_t *v2, int order)
+INIT_XMM sse2
 cglobal scalarproduct_int16, 3,3,3, v1, v2, order
 add orderd, orderd
 add v1q, orderq
@@ -42,16 +42,7 @@ cglobal scalarproduct_int16, 3,3,3, v1, v2, order
 jl .loop
 HADDD   m2, m0
 movd   eax, m2
-%if mmsize == 8
-emms
-%endif
 RET
-%endmacro
-
-INIT_MMX mmxext
-SCALARPRODUCT
-INIT_XMM sse2
-SCALARPRODUCT
 
 
 ;-
@@ -117,8 +108,6 @@ cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len
 REP_RET
 %endmacro
 
-INIT_MMX mmx
-VECTOR_CLIP_INT32 0, 1, 0, 0
 INIT_XMM sse2
 VECTOR_CLIP_INT32 6, 1, 0, 0, _int
 VECTOR_CLIP_INT32 6, 2, 0, 1
diff --git a/libavcodec/x86/audiodsp_init.c b/libavcodec/x86/audiodsp_init.c
index 98e296c264..aa5e43e570 100644
--- a/libavcodec/x86/audiodsp_init.c
+++ b/libavcodec/x86/audiodsp_init.c
@@ -24,13 +24,9 @@
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/audiodsp.h"
 
-int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2,
-  int order);
 int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2,
 int order);
 
-void ff_vector_clip_int32_mmx(int32_t *dst, const int32_t *src,
-  int32_t min, int32_t max, unsigned int len);
 void ff_vector_clip_int32_sse2(int32_t *dst, const int32_t *src,
int32_t min, int32_t max, unsigned int len);
 void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src,
@@ -44,12 +40,6 @@ av_cold void ff_audiodsp_init_x86(AudioDSPContext *c)
 {
 int cpu_flags = av_get_cpu_flags();
 
-if (EXTERNAL_MMX(cpu_flags))
-c->vector_clip_int32 = ff_vector_clip_int32_mmx;
-
-if (EXTERNAL_MMXEXT(cpu_flags))
-c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext;
-
 if (EXTERNAL_SSE(cpu_flags))
 c->vector_clipf = ff_vector_clipf_sse;
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/x86/mpegvideoenc: Remove obsolete MMX(EXT) functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Tue Jun  7 02:06:41 2022 +0200| [7284ab789d5fe271b9d6a1666ab5ea6be8724cca] | 
committer: Andreas Rheinhardt

avcodec/x86/mpegvideoenc: Remove obsolete MMX(EXT) functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=7284ab789d5fe271b9d6a1666ab5ea6be8724cca
---

 libavcodec/x86/mpegvideoenc.c  | 95 +-
 libavcodec/x86/mpegvideoenc_template.c | 10 
 2 files changed, 1 insertion(+), 104 deletions(-)

diff --git a/libavcodec/x86/mpegvideoenc.c b/libavcodec/x86/mpegvideoenc.c
index 3691cce26c..612e7ff758 100644
--- a/libavcodec/x86/mpegvideoenc.c
+++ b/libavcodec/x86/mpegvideoenc.c
@@ -42,34 +42,9 @@ DECLARE_ALIGNED(16, static const uint16_t, 
inv_zigzag_direct16)[64] = {
 
 #if HAVE_6REGS
 
-#if HAVE_MMX_INLINE
-#define COMPILE_TEMPLATE_MMXEXT 0
-#define COMPILE_TEMPLATE_SSE2   0
-#define COMPILE_TEMPLATE_SSSE3  0
-#define RENAME(a)  a ## _mmx
-#define RENAME_FDCT(a) a ## _mmx
-#include "mpegvideoenc_template.c"
-#endif /* HAVE_MMX_INLINE */
-
-#if HAVE_MMXEXT_INLINE
-#undef COMPILE_TEMPLATE_SSSE3
-#undef COMPILE_TEMPLATE_SSE2
-#undef COMPILE_TEMPLATE_MMXEXT
-#define COMPILE_TEMPLATE_MMXEXT 1
-#define COMPILE_TEMPLATE_SSE2   0
-#define COMPILE_TEMPLATE_SSSE3  0
-#undef RENAME
-#undef RENAME_FDCT
-#define RENAME(a)  a ## _mmxext
-#define RENAME_FDCT(a) a ## _mmxext
-#include "mpegvideoenc_template.c"
-#endif /* HAVE_MMXEXT_INLINE */
-
 #if HAVE_SSE2_INLINE
-#undef COMPILE_TEMPLATE_MMXEXT
 #undef COMPILE_TEMPLATE_SSE2
 #undef COMPILE_TEMPLATE_SSSE3
-#define COMPILE_TEMPLATE_MMXEXT 0
 #define COMPILE_TEMPLATE_SSE2   1
 #define COMPILE_TEMPLATE_SSSE3  0
 #undef RENAME
@@ -80,10 +55,8 @@ DECLARE_ALIGNED(16, static const uint16_t, 
inv_zigzag_direct16)[64] = {
 #endif /* HAVE_SSE2_INLINE */
 
 #if HAVE_SSSE3_INLINE
-#undef COMPILE_TEMPLATE_MMXEXT
 #undef COMPILE_TEMPLATE_SSE2
 #undef COMPILE_TEMPLATE_SSSE3
-#define COMPILE_TEMPLATE_MMXEXT 0
 #define COMPILE_TEMPLATE_SSE2   1
 #define COMPILE_TEMPLATE_SSSE3  1
 #undef RENAME
@@ -96,62 +69,6 @@ DECLARE_ALIGNED(16, static const uint16_t, 
inv_zigzag_direct16)[64] = {
 #endif /* HAVE_6REGS */
 
 #if HAVE_INLINE_ASM
-#if HAVE_MMX_INLINE
-static void  denoise_dct_mmx(MpegEncContext *s, int16_t *block){
-const int intra= s->mb_intra;
-int *sum= s->dct_error_sum[intra];
-uint16_t *offset= s->dct_offset[intra];
-
-s->dct_count[intra]++;
-
-__asm__ volatile(
-"pxor %%mm7, %%mm7  \n\t"
-"1: \n\t"
-"pxor %%mm0, %%mm0  \n\t"
-"pxor %%mm1, %%mm1  \n\t"
-"movq (%0), %%mm2   \n\t"
-"movq 8(%0), %%mm3  \n\t"
-"pcmpgtw %%mm2, %%mm0   \n\t"
-"pcmpgtw %%mm3, %%mm1   \n\t"
-"pxor %%mm0, %%mm2  \n\t"
-"pxor %%mm1, %%mm3  \n\t"
-"psubw %%mm0, %%mm2 \n\t"
-"psubw %%mm1, %%mm3 \n\t"
-"movq %%mm2, %%mm4  \n\t"
-"movq %%mm3, %%mm5  \n\t"
-"psubusw (%2), %%mm2\n\t"
-"psubusw 8(%2), %%mm3   \n\t"
-"pxor %%mm0, %%mm2  \n\t"
-"pxor %%mm1, %%mm3  \n\t"
-"psubw %%mm0, %%mm2 \n\t"
-"psubw %%mm1, %%mm3 \n\t"
-"movq %%mm2, (%0)   \n\t"
-"movq %%mm3, 8(%0)  \n\t"
-"movq %%mm4, %%mm2  \n\t"
-"movq %%mm5, %%mm3  \n\t"
-"punpcklwd %%mm7, %%mm4 \n\t"
-"punpckhwd %%mm7, %%mm2 \n\t"
-"punpcklwd %%mm7, %%mm5 \n\t"
-"punpckhwd %%mm7, %%mm3 \n\t"
-"paddd (%1), %%mm4  \n\t"
-"paddd 8(%1), %%mm2 \n\t"
-"paddd 16(%1), %%mm5\n\t"
-"paddd 24(%1), %%mm3\n\t"
-"movq %%mm4, (%1)   \n\t"
-"movq %%mm2, 8(%1)  \n\t"
-"movq %%mm5, 16(%1) \n\t"
-"movq %%mm3, 24(%1) \n\t"
-"add $16, %0\n\t"
-"add $32, %1\n\t"
-"add $16, %2\n\t"
-"cmp %3, %0

[FFmpeg-cvslog] avcodec/x86/cavsdsp: Remove obsolete MMX(EXT), 3dnow functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Tue Jun  7 13:55:24 2022 +0200| [3221aba87989742ea22b639a7bb4af69f4eaa0e7] | 
committer: Andreas Rheinhardt

avcodec/x86/cavsdsp: Remove obsolete MMX(EXT), 3dnow functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3221aba87989742ea22b639a7bb4af69f4eaa0e7
---

 libavcodec/x86/cavsdsp.c| 71 +++--
 libavcodec/x86/cavsidct.asm | 62 ---
 2 files changed, 4 insertions(+), 129 deletions(-)

diff --git a/libavcodec/x86/cavsdsp.c b/libavcodec/x86/cavsdsp.c
index f974f93fc0..7ceb51a23c 100644
--- a/libavcodec/x86/cavsdsp.c
+++ b/libavcodec/x86/cavsdsp.c
@@ -36,16 +36,7 @@
 #include "config.h"
 
 
-#if HAVE_MMX_EXTERNAL
-
-void ff_cavs_idct8_mmx(int16_t *out, const int16_t *in);
-
-static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, ptrdiff_t stride)
-{
-LOCAL_ALIGNED(16, int16_t, b2, [64]);
-ff_cavs_idct8_mmx(b2, block);
-ff_add_pixels_clamped_mmx(b2, dst, stride);
-}
+#if HAVE_SSE2_EXTERNAL
 
 void ff_cavs_idct8_sse2(int16_t *out, const int16_t *in);
 
@@ -56,9 +47,9 @@ static void cavs_idct8_add_sse2(uint8_t *dst, int16_t *block, 
ptrdiff_t stride)
 ff_add_pixels_clamped_sse2(b2, dst, stride);
 }
 
-#endif /* HAVE_MMX_EXTERNAL */
+#endif /* HAVE_SSE2_EXTERNAL */
 
-#if (HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE)
+#if HAVE_MMXEXT_INLINE
 
 /*
  *
@@ -326,7 +317,7 @@ static void OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## 
MMX(uint8_t *dst, const uin
 "pavgb " #temp ", " #a "  \n\t"\
 "mov" #size " " #a ", " #b "  \n\t"
 
-#endif /* (HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE) */
+#endif /* HAVE_MMXEXT_INLINE */
 
 #if HAVE_MMX_EXTERNAL
 static void put_cavs_qpel8_mc00_mmx(uint8_t *dst, const uint8_t *src,
@@ -335,36 +326,12 @@ static void put_cavs_qpel8_mc00_mmx(uint8_t *dst, const 
uint8_t *src,
 ff_put_pixels8_mmx(dst, src, stride, 8);
 }
 
-static void avg_cavs_qpel8_mc00_mmx(uint8_t *dst, const uint8_t *src,
-ptrdiff_t stride)
-{
-ff_avg_pixels8_mmx(dst, src, stride, 8);
-}
-
 static void avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride)
 {
 ff_avg_pixels8_mmxext(dst, src, stride, 8);
 }
 
-static void put_cavs_qpel16_mc00_mmx(uint8_t *dst, const uint8_t *src,
- ptrdiff_t stride)
-{
-ff_put_pixels16_mmx(dst, src, stride, 16);
-}
-
-static void avg_cavs_qpel16_mc00_mmx(uint8_t *dst, const uint8_t *src,
- ptrdiff_t stride)
-{
-ff_avg_pixels16_mmx(dst, src, stride, 16);
-}
-
-static void avg_cavs_qpel16_mc00_mmxext(uint8_t *dst, const uint8_t *src,
-ptrdiff_t stride)
-{
-ff_avg_pixels16_mmxext(dst, src, stride, 16);
-}
-
 static void put_cavs_qpel16_mc00_sse2(uint8_t *dst, const uint8_t *src,
   ptrdiff_t stride)
 {
@@ -382,13 +349,7 @@ static av_cold void cavsdsp_init_mmx(CAVSDSPContext *c,
  AVCodecContext *avctx)
 {
 #if HAVE_MMX_EXTERNAL
-c->put_cavs_qpel_pixels_tab[0][0] = put_cavs_qpel16_mc00_mmx;
 c->put_cavs_qpel_pixels_tab[1][0] = put_cavs_qpel8_mc00_mmx;
-c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_mmx;
-c->avg_cavs_qpel_pixels_tab[1][0] = avg_cavs_qpel8_mc00_mmx;
-
-c->cavs_idct8_add = cavs_idct8_add_mmx;
-c->idct_perm  = FF_IDCT_PERM_TRANSPOSE;
 #endif /* HAVE_MMX_EXTERNAL */
 }
 
@@ -408,25 +369,6 @@ CAVS_MC(avg_,  8, mmxext)
 CAVS_MC(avg_, 16, mmxext)
 #endif /* HAVE_MMXEXT_INLINE */
 
-#if HAVE_AMD3DNOW_INLINE
-QPEL_CAVS(put_,   PUT_OP, 3dnow)
-QPEL_CAVS(avg_, AVG_3DNOW_OP, 3dnow)
-
-CAVS_MC(put_, 8, 3dnow)
-CAVS_MC(put_, 16,3dnow)
-CAVS_MC(avg_, 8, 3dnow)
-CAVS_MC(avg_, 16,3dnow)
-
-static av_cold void cavsdsp_init_3dnow(CAVSDSPContext *c,
-   AVCodecContext *avctx)
-{
-DSPFUNC(put, 0, 16, 3dnow);
-DSPFUNC(put, 1,  8, 3dnow);
-DSPFUNC(avg, 0, 16, 3dnow);
-DSPFUNC(avg, 1,  8, 3dnow);
-}
-#endif /* HAVE_AMD3DNOW_INLINE */
-
 av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c, AVCodecContext *avctx)
 {
 av_unused int cpu_flags = av_get_cpu_flags();
@@ -434,10 +376,6 @@ av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c, 
AVCodecContext *avctx)
 if (X86_MMX(cpu_flags))
 cavsdsp_init_mmx(c, avctx);
 
-#if HAVE_AMD3DNOW_INLINE
-if (INLINE_AMD3DNOW(cpu_flags))
-cavsdsp_init_3dnow(c, 

[FFmpeg-cvslog] avcodec/x86/fdct: Remove obsolete MMX(EXT) functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Fri Jun 10 15:32:33 2022 +0200| [d402ec6be99dc82e263bad883e7c1c3d957343db] | 
committer: Andreas Rheinhardt

avcodec/x86/fdct: Remove obsolete MMX(EXT) functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d402ec6be99dc82e263bad883e7c1c3d957343db
---

 libavcodec/tests/x86/dct.c|   6 --
 libavcodec/x86/fdct.c | 221 +-
 libavcodec/x86/fdct.h |   2 -
 libavcodec/x86/fdctdsp_init.c |   6 --
 4 files changed, 1 insertion(+), 234 deletions(-)

diff --git a/libavcodec/tests/x86/dct.c b/libavcodec/tests/x86/dct.c
index b332c9642d..6e3d8f7c01 100644
--- a/libavcodec/tests/x86/dct.c
+++ b/libavcodec/tests/x86/dct.c
@@ -58,12 +58,6 @@ PR_WRAP(avx)
 #endif
 
 static const struct algo fdct_tab_arch[] = {
-#if HAVE_MMX_INLINE
-{ "MMX",ff_fdct_mmx,FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMX },
-#endif
-#if HAVE_MMXEXT_INLINE
-{ "MMXEXT", ff_fdct_mmxext, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMXEXT },
-#endif
 #if HAVE_SSE2_INLINE
 { "SSE2",   ff_fdct_sse2,   FF_IDCT_PERM_NONE, AV_CPU_FLAG_SSE2 },
 #endif
diff --git a/libavcodec/x86/fdct.c b/libavcodec/x86/fdct.c
index 835fcc2b28..f4677ff4be 100644
--- a/libavcodec/x86/fdct.c
+++ b/libavcodec/x86/fdct.c
@@ -37,7 +37,7 @@
 #include "libavutil/x86/asm.h"
 #include "fdct.h"
 
-#if HAVE_MMX_INLINE
+#if HAVE_SSE2_INLINE
 
 //
 //
@@ -71,8 +71,6 @@ DECLARE_ALIGNED(16, static const int16_t, ocos_4_16)[8] = {
 
 DECLARE_ALIGNED(16, static const int16_t, fdct_one_corr)[8] = { X8(1) };
 
-DECLARE_ALIGNED(8, static const int32_t, fdct_r_row)[2] = {RND_FRW_ROW, 
RND_FRW_ROW };
-
 static const struct
 {
  DECLARE_ALIGNED(16, const int32_t, fdct_r_row_sse2)[4];
@@ -82,80 +80,6 @@ static const struct
 }};
 //DECLARE_ALIGNED(16, static const long, fdct_r_row_sse2)[4] = {RND_FRW_ROW, 
RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW};
 
-DECLARE_ALIGNED(8, static const int16_t, tab_frw_01234567)[] = {  // 
forward_dct coeff table
-  16384,   16384,   22725,   19266,
-  16384,   16384,   12873,4520,
-  21407,8867,   19266,   -4520,
-  -8867,  -21407,  -22725,  -12873,
-  16384,  -16384,   12873,  -22725,
- -16384,   16384,4520,   19266,
-   8867,  -21407,4520,  -12873,
-  21407,   -8867,   19266,  -22725,
-
-  22725,   22725,   31521,   26722,
-  22725,   22725,   17855,6270,
-  29692,   12299,   26722,   -6270,
- -12299,  -29692,  -31521,  -17855,
-  22725,  -22725,   17855,  -31521,
- -22725,   22725,6270,   26722,
-  12299,  -29692,6270,  -17855,
-  29692,  -12299,   26722,  -31521,
-
-  21407,   21407,   29692,   25172,
-  21407,   21407,   16819,5906,
-  27969,   11585,   25172,   -5906,
- -11585,  -27969,  -29692,  -16819,
-  21407,  -21407,   16819,  -29692,
- -21407,   21407,5906,   25172,
-  11585,  -27969,5906,  -16819,
-  27969,  -11585,   25172,  -29692,
-
-  19266,   19266,   26722,   22654,
-  19266,   19266,   15137,5315,
-  25172,   10426,   22654,   -5315,
- -10426,  -25172,  -26722,  -15137,
-  19266,  -19266,   15137,  -26722,
- -19266,   19266,5315,   22654,
-  10426,  -25172,5315,  -15137,
-  25172,  -10426,   22654,  -26722,
-
-  16384,   16384,   22725,   19266,
-  16384,   16384,   12873,4520,
-  21407,8867,   19266,   -4520,
-  -8867,  -21407,  -22725,  -12873,
-  16384,  -16384,   12873,  -22725,
- -16384,   16384,4520,   19266,
-   8867,  -21407,4520,  -12873,
-  21407,   -8867,   19266,  -22725,
-
-  19266,   19266,   26722,   22654,
-  19266,   19266,   15137,5315,
-  25172,   10426,   22654,   -5315,
- -10426,  -25172,  -26722,  -15137,
-  19266,  -19266,   15137,  -26722,
- -19266,   19266,5315,   22654,
-  10426,  -25172,5315,  -15137,
-  25172,  -10426,   22654,  -26722,
-
-  21407,   21407,   29692,   25172,
-  21407,   21407,   16819,5906,
-  27969,   11585,   25172,   -5906,
- -11585,  -27969,  -29692,  -16819,
-  21407,  -21407,   16819,  -29692,
- -21407,   21407,5906,   25172,
-  11585,  -27969,5906,  -16819,
-  27969,  -11585,   25172,  -29692,
-
-  22725,   22725,   31521,   26722,
-  22725,   22725,   17855,6270,
-  29692,   12299,   26722,   -6270,
- -12299,  -29692,  -31521,  -17855,
-  22725,  -22725,   17855,  -31521,
- -22725,   22725,6270,   26722,
-  12299,  -29692,6270,  -17855,
-  29692,  -12299,   26722,  -31521,
-};
-
 static const struct
 {
  DECLARE_ALIGNED(16, const int16_t, tab_frw_01234567_sse2)[256];
@@ -375,7 +299,6 @@ static av_always_inline void fdct_col_##cpu(const int16_t 
*in, int16_t *out, int
  

[FFmpeg-cvslog] avcodec/x86/h264_intrapred: Remove obsolete MMX(EXT) functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Tue Jun  7 15:48:09 2022 +0200| [9bc527126c0e3d8d57e40b33b08e91eb3adf4575] | 
committer: Andreas Rheinhardt

avcodec/x86/h264_intrapred: Remove obsolete MMX(EXT) functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9bc527126c0e3d8d57e40b33b08e91eb3adf4575
---

 libavcodec/x86/h264_intrapred.asm   | 715 +---
 libavcodec/x86/h264_intrapred_10bit.asm |  94 +
 libavcodec/x86/h264_intrapred_init.c|  77 
 3 files changed, 10 insertions(+), 876 deletions(-)

diff --git a/libavcodec/x86/h264_intrapred.asm 
b/libavcodec/x86/h264_intrapred.asm
index b36c198fbb..31840a1472 100644
--- a/libavcodec/x86/h264_intrapred.asm
+++ b/libavcodec/x86/h264_intrapred.asm
@@ -48,22 +48,6 @@ cextern pw_8
 ; void ff_pred16x16_vertical_8(uint8_t *src, ptrdiff_t stride)
 ;-
 
-INIT_MMX mmx
-cglobal pred16x16_vertical_8, 2,3
-sub   r0, r1
-mov   r2, 8
-movq mm0, [r0+0]
-movq mm1, [r0+8]
-.loop:
-movq [r0+r1*1+0], mm0
-movq [r0+r1*1+8], mm1
-movq [r0+r1*2+0], mm0
-movq [r0+r1*2+8], mm1
-lea   r0, [r0+r1*2]
-dec   r2
-jg .loop
-REP_RET
-
 INIT_XMM sse
 cglobal pred16x16_vertical_8, 2,3
 sub   r0, r1
@@ -114,8 +98,6 @@ cglobal pred16x16_horizontal_8, 2,3
 REP_RET
 %endmacro
 
-INIT_MMX mmx
-PRED16x16_H
 INIT_MMX mmxext
 PRED16x16_H
 INIT_XMM ssse3
@@ -154,14 +136,6 @@ cglobal pred16x16_dc_8, 2,7
 %endif
 SPLATB_REG m0, r2, m1
 
-%if mmsize==8
-mov   r3d, 8
-.loop:
-mova [r4+r1*0+0], m0
-mova [r4+r1*0+8], m0
-mova [r4+r1*1+0], m0
-mova [r4+r1*1+8], m0
-%else
 mov   r3d, 4
 .loop:
 mova [r4+r1*0], m0
@@ -169,15 +143,12 @@ cglobal pred16x16_dc_8, 2,7
 lea   r4, [r4+r1*2]
 mova [r4+r1*0], m0
 mova [r4+r1*1], m0
-%endif
 lea   r4, [r4+r1*2]
 dec   r3d
 jg .loop
 REP_RET
 %endmacro
 
-INIT_MMX mmxext
-PRED16x16_DC
 INIT_XMM sse2
 PRED16x16_DC
 INIT_XMM ssse3
@@ -187,47 +158,6 @@ PRED16x16_DC
 ; void ff_pred16x16_tm_vp8_8(uint8_t *src, ptrdiff_t stride)
 ;-
 
-%macro PRED16x16_TM 0
-cglobal pred16x16_tm_vp8_8, 2,5
-subr0, r1
-pxor  mm7, mm7
-movq  mm0, [r0+0]
-movq  mm2, [r0+8]
-movq  mm1, mm0
-movq  mm3, mm2
-punpcklbw mm0, mm7
-punpckhbw mm1, mm7
-punpcklbw mm2, mm7
-punpckhbw mm3, mm7
-movzx r3d, byte [r0-1]
-mov   r4d, 16
-.loop:
-movzx r2d, byte [r0+r1-1]
-sub   r2d, r3d
-movd  mm4, r2d
-SPLATWmm4, mm4, 0
-movq  mm5, mm4
-movq  mm6, mm4
-movq  mm7, mm4
-paddw mm4, mm0
-paddw mm5, mm1
-paddw mm6, mm2
-paddw mm7, mm3
-packuswb  mm4, mm5
-packuswb  mm6, mm7
-movq [r0+r1+0], mm4
-movq [r0+r1+8], mm6
-addr0, r1
-dec   r4d
-jg .loop
-REP_RET
-%endmacro
-
-INIT_MMX mmx
-PRED16x16_TM
-INIT_MMX mmxext
-PRED16x16_TM
-
 INIT_XMM sse2
 cglobal pred16x16_tm_vp8_8, 2,6,6
 sub  r0, r1
@@ -311,22 +241,6 @@ cglobal pred16x16_plane_%1_8, 2,9,7
 neg  r1   ; -stride
 
 movh m0, [r0+r1  -1]
-%if mmsize == 8
-pxor m4, m4
-movh m1, [r0+r1  +3 ]
-movh m2, [r0+r1  +8 ]
-movh m3, [r0+r1  +12]
-punpcklbwm0, m4
-punpcklbwm1, m4
-punpcklbwm2, m4
-punpcklbwm3, m4
-pmullw   m0, [pw_m8tom1  ]
-pmullw   m1, [pw_m8tom1+8]
-pmullw   m2, [pw_1to8]
-pmullw   m3, [pw_1to8  +8]
-paddwm0, m2
-paddwm1, m3
-%else ; mmsize == 16
 %if cpuflag(ssse3)
 movhps   m0, [r0+r1  +8]
 pmaddubswm0, [plane_shuf] ; H coefficients
@@ -340,21 +254,10 @@ cglobal pred16x16_plane_%1_8, 2,9,7
 paddwm0, m1
 %endif
 movhlps  m1, m0
-%endif
 paddwm0, m1
-%if cpuflag(mmxext)
 PSHUFLW  m1, m0, 0xE
-%elif cpuflag(mmx)
-mova m1, m0
-psrlqm1, 32
-%endif
 paddwm0, m1
-%if cpuflag(mmxext)
 PSHUFLW  m1, m0, 0x1
-%elif cpuflag(mmx)
-mova m1, m0
-psrlqm1, 16
-%endif
 paddwm0, m1   ; sum of H coefficients
 
 lea  r4, [r0+r2*8-1]
@@ -496,24 +399,10 @@ cglobal pred16x16_plane_%1_8, 2,9,7
 SWAP  0, 1
 %endif
 mova m2, m0
-%if mmsize == 8
-mova m5, m0
-%endif
 pmullw   m0, [pw_0to7]; 0*H, 1*H, ..., 7*H  (

[FFmpeg-cvslog] avcodec/x86/rv40dsp_init: Remove obsolete MMX(EXT), 3dnow functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Fri Jun 10 16:28:37 2022 +0200| [9abf906800155a3828e869f1820605872c2de6b4] | 
committer: Andreas Rheinhardt

avcodec/x86/rv40dsp_init: Remove obsolete MMX(EXT), 3dnow functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9abf906800155a3828e869f1820605872c2de6b4
---

 libavcodec/x86/h264_chromamc.asm |  2 --
 libavcodec/x86/rv40dsp.asm   | 20 -
 libavcodec/x86/rv40dsp_init.c| 47 
 3 files changed, 69 deletions(-)

diff --git a/libavcodec/x86/h264_chromamc.asm b/libavcodec/x86/h264_chromamc.asm
index c7c4a2d4aa..ba6f4af3b0 100644
--- a/libavcodec/x86/h264_chromamc.asm
+++ b/libavcodec/x86/h264_chromamc.asm
@@ -448,9 +448,7 @@ chroma_mc2_mmx_func avg, h264
 
 INIT_MMX 3dnow
 chroma_mc8_mmx_func avg, h264, _rnd
-chroma_mc8_mmx_func avg, rv40
 chroma_mc4_mmx_func avg, h264
-chroma_mc4_mmx_func avg, rv40
 
 %macro chroma_mc8_ssse3_func 2-3
 cglobal %1_%2_chroma_mc8%3, 6, 7, 8
diff --git a/libavcodec/x86/rv40dsp.asm b/libavcodec/x86/rv40dsp.asm
index bcad1aee80..f2ce236d44 100644
--- a/libavcodec/x86/rv40dsp.asm
+++ b/libavcodec/x86/rv40dsp.asm
@@ -230,20 +230,6 @@ cglobal %1_rv40_qpel_h, 6, 6+npicregs, 12, dst, dststride, 
src, srcstride, heigh
 REP_RET
 %endmacro
 
-%if ARCH_X86_32
-INIT_MMX  mmx
-FILTER_V  put
-FILTER_H  put
-
-INIT_MMX  mmxext
-FILTER_V  avg
-FILTER_H  avg
-
-INIT_MMX  3dnow
-FILTER_V  avg
-FILTER_H  avg
-%endif
-
 INIT_XMM  sse2
 FILTER_H  put
 FILTER_H  avg
@@ -481,12 +467,6 @@ cglobal rv40_weight_func_%1_%2, 6, 7, 8
 REP_RET
 %endmacro
 
-INIT_MMX mmxext
-RV40_WEIGHT   rnd,8, 3
-RV40_WEIGHT   rnd,   16, 4
-RV40_WEIGHT   nornd,  8, 3
-RV40_WEIGHT   nornd, 16, 4
-
 INIT_XMM sse2
 RV40_WEIGHT   rnd,8, 3
 RV40_WEIGHT   rnd,   16, 4
diff --git a/libavcodec/x86/rv40dsp_init.c b/libavcodec/x86/rv40dsp_init.c
index 7a05ab14ad..a04c5a5449 100644
--- a/libavcodec/x86/rv40dsp_init.c
+++ b/libavcodec/x86/rv40dsp_init.c
@@ -44,15 +44,11 @@ void ff_put_rv40_chroma_mc8_mmx  (uint8_t *dst, uint8_t 
*src,
   ptrdiff_t stride, int h, int x, int y);
 void ff_avg_rv40_chroma_mc8_mmxext(uint8_t *dst, uint8_t *src,
ptrdiff_t stride, int h, int x, int y);
-void ff_avg_rv40_chroma_mc8_3dnow(uint8_t *dst, uint8_t *src,
-  ptrdiff_t stride, int h, int x, int y);
 
 void ff_put_rv40_chroma_mc4_mmx  (uint8_t *dst, uint8_t *src,
   ptrdiff_t stride, int h, int x, int y);
 void ff_avg_rv40_chroma_mc4_mmxext(uint8_t *dst, uint8_t *src,
ptrdiff_t stride, int h, int x, int y);
-void ff_avg_rv40_chroma_mc4_3dnow(uint8_t *dst, uint8_t *src,
-  ptrdiff_t stride, int h, int x, int y);
 
 #define DECLARE_WEIGHT(opt) \
 void ff_rv40_weight_func_rnd_16_##opt(uint8_t *dst, uint8_t *src1, uint8_t 
*src2, \
@@ -63,7 +59,6 @@ void ff_rv40_weight_func_nornd_16_##opt(uint8_t *dst, uint8_t 
*src1, uint8_t *sr
 int w1, int w2, ptrdiff_t stride); \
 void ff_rv40_weight_func_nornd_8_##opt (uint8_t *dst, uint8_t *src1, uint8_t 
*src2, \
 int w1, int w2, ptrdiff_t stride);
-DECLARE_WEIGHT(mmxext)
 DECLARE_WEIGHT(sse2)
 DECLARE_WEIGHT(ssse3)
 
@@ -148,25 +143,6 @@ QPEL_MC_DECL(avg_, _ssse3)
 QPEL_MC_DECL(put_, _sse2)
 QPEL_MC_DECL(avg_, _sse2)
 
-#if ARCH_X86_32
-#undef LOOPSIZE
-#undef HCOFF
-#undef VCOFF
-#define LOOPSIZE  4
-#define HCOFF(x)  (64 * ((x) - 1))
-#define VCOFF(x)  (64 * ((x) - 1))
-
-QPEL_MC_DECL(put_, _mmx)
-
-#define ff_put_rv40_qpel_h_mmxext  ff_put_rv40_qpel_h_mmx
-#define ff_put_rv40_qpel_v_mmxext  ff_put_rv40_qpel_v_mmx
-QPEL_MC_DECL(avg_, _mmxext)
-
-#define ff_put_rv40_qpel_h_3dnow  ff_put_rv40_qpel_h_mmx
-#define ff_put_rv40_qpel_v_3dnow  ff_put_rv40_qpel_v_mmx
-QPEL_MC_DECL(avg_, _3dnow)
-#endif
-
 /** @{ */
 /** Set one function */
 #define QPEL_FUNC_SET(OP, SIZE, PH, PV, OPT)\
@@ -207,9 +183,6 @@ DEFINE_FN(avg, 16, ssse3)
 
 #if HAVE_MMX_INLINE
 DEFINE_FN(put, 8, mmx)
-DEFINE_FN(avg, 8, mmx)
-DEFINE_FN(put, 16, mmx)
-DEFINE_FN(avg, 16, mmx)
 #endif
 
 av_cold void ff_rv40dsp_init_x86(RV34DSPContext *c)
@@ -218,10 +191,7 @@ av_cold void ff_rv40dsp_init_x86(RV34DSPContext *c)
 
 #if HAVE_MMX_INLINE
 if (INLINE_MMX(cpu_flags)) {
-c->put_pixels_tab[0][15] = put_rv40_qpel16_mc33_mmx;
 c->put_pixels_tab[1][15] = put_rv40_qpel8_mc33_mmx;
-c->avg_pixels_tab[0][15] = avg_rv40_qpel16_mc33_mmx;
-c->avg_pi

[FFmpeg-cvslog] avfilter/x86/vf_noise: Remove obsolete MMX function

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Tue Jun  7 22:26:33 2022 +0200| [c5dd2fdc090fab7e2d0edce4fb3b463028c5fafb] | 
committer: Andreas Rheinhardt

avfilter/x86/vf_noise: Remove obsolete MMX function

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from line_noise_mmx are truely ancient 32bit x86s
it is removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c5dd2fdc090fab7e2d0edce4fb3b463028c5fafb
---

 libavfilter/x86/vf_noise.c | 29 -
 1 file changed, 29 deletions(-)

diff --git a/libavfilter/x86/vf_noise.c b/libavfilter/x86/vf_noise.c
index f7a4d00336..43387c9908 100644
--- a/libavfilter/x86/vf_noise.c
+++ b/libavfilter/x86/vf_noise.c
@@ -25,34 +25,6 @@
 #include "libavfilter/vf_noise.h"
 
 #if HAVE_INLINE_ASM
-static void line_noise_mmx(uint8_t *dst, const uint8_t *src,
-   const int8_t *noise, int len, int shift)
-{
-x86_reg mmx_len= len & (~7);
-noise += shift;
-
-__asm__ volatile(
-"mov %3, %%"FF_REG_a"\n\t"
-"pcmpeqb %%mm7, %%mm7\n\t"
-"psllw $15, %%mm7\n\t"
-"packsswb %%mm7, %%mm7   \n\t"
-".p2align 4  \n\t"
-"1:  \n\t"
-"movq (%0, %%"FF_REG_a"), %%mm0  \n\t"
-"movq (%1, %%"FF_REG_a"), %%mm1  \n\t"
-"pxor %%mm7, %%mm0   \n\t"
-"paddsb %%mm1, %%mm0 \n\t"
-"pxor %%mm7, %%mm0   \n\t"
-"movq %%mm0, (%2, %%"FF_REG_a")  \n\t"
-"add $8, %%"FF_REG_a"\n\t"
-" js 1b  \n\t"
-:: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" 
(-mmx_len)
-: "%"FF_REG_a
-);
-if (mmx_len != len)
-ff_line_noise_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 
0);
-}
-
 #if HAVE_6REGS
 static void line_noise_avg_mmx(uint8_t *dst, const uint8_t *src,
   int len, const int8_t * const *shift)
@@ -132,7 +104,6 @@ av_cold void ff_noise_init_x86(NoiseContext *n)
 int cpu_flags = av_get_cpu_flags();
 
 if (INLINE_MMX(cpu_flags)) {
-n->line_noise = line_noise_mmx;
 #if HAVE_6REGS
 n->line_noise_avg = line_noise_avg_mmx;
 #endif

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/x86/hevcdsp_init: Remove obsolete MMXEXT functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Fri Jun 10 15:47:57 2022 +0200| [338f8fd2324385fdabba7de08de25885ade2edc1] | 
committer: Andreas Rheinhardt

avcodec/x86/hevcdsp_init: Remove obsolete MMXEXT functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=338f8fd2324385fdabba7de08de25885ade2edc1
---

 libavcodec/x86/hevc_idct.asm  | 1 -
 libavcodec/x86/hevcdsp_init.c | 4 
 2 files changed, 5 deletions(-)

diff --git a/libavcodec/x86/hevc_idct.asm b/libavcodec/x86/hevc_idct.asm
index 1eb1973f27..ce41f33822 100644
--- a/libavcodec/x86/hevc_idct.asm
+++ b/libavcodec/x86/hevc_idct.asm
@@ -811,7 +811,6 @@ cglobal hevc_idct_32x32_%1, 1, 6, 16, 256, coeffs
 %macro INIT_IDCT_DC 1
 INIT_MMX mmxext
 IDCT_DC_NL  4,  %1
-IDCT_DC 8,  2,  %1
 
 INIT_XMM sse2
 IDCT_DC_NL  8,  %1
diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c
index 48f48a925f..f7a5b28bec 100644
--- a/libavcodec/x86/hevcdsp_init.c
+++ b/libavcodec/x86/hevcdsp_init.c
@@ -64,7 +64,6 @@ void ff_hevc_idct_ ## W ## _dc_10_ ## opt(int16_t *coeffs); \
 void ff_hevc_idct_ ## W ## _dc_12_ ## opt(int16_t *coeffs)
 
 IDCT_DC_FUNCS(4x4,   mmxext);
-IDCT_DC_FUNCS(8x8,   mmxext);
 IDCT_DC_FUNCS(8x8,   sse2);
 IDCT_DC_FUNCS(16x16, sse2);
 IDCT_DC_FUNCS(32x32, sse2);
@@ -712,7 +711,6 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int 
bit_depth)
 if (bit_depth == 8) {
 if (EXTERNAL_MMXEXT(cpu_flags)) {
 c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_mmxext;
-c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_mmxext;
 
 c->add_residual[0] = ff_hevc_add_residual_4_8_mmxext;
 }
@@ -889,7 +887,6 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int 
bit_depth)
 if (EXTERNAL_MMXEXT(cpu_flags)) {
 c->add_residual[0] = ff_hevc_add_residual_4_10_mmxext;
 c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_mmxext;
-c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_mmxext;
 }
 if (EXTERNAL_SSE2(cpu_flags)) {
 c->hevc_v_loop_filter_chroma = 
ff_hevc_v_loop_filter_chroma_10_sse2;
@@ -1105,7 +1102,6 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int 
bit_depth)
 } else if (bit_depth == 12) {
 if (EXTERNAL_MMXEXT(cpu_flags)) {
 c->idct_dc[0] = ff_hevc_idct_4x4_dc_12_mmxext;
-c->idct_dc[1] = ff_hevc_idct_8x8_dc_12_mmxext;
 }
 if (EXTERNAL_SSE2(cpu_flags)) {
 c->hevc_v_loop_filter_chroma = 
ff_hevc_v_loop_filter_chroma_12_sse2;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/x86/mpegvideoencdsp: Remove obsolete MMX(EXT) functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Sat Jun 11 04:09:24 2022 +0200| [55d8618a4772513125678c9098fa6ca77dfdb3c3] | 
committer: Andreas Rheinhardt

avcodec/x86/mpegvideoencdsp: Remove obsolete MMX(EXT) functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=55d8618a4772513125678c9098fa6ca77dfdb3c3
---

 libavcodec/x86/mpegvideoencdsp.asm| 49 ++-
 libavcodec/x86/mpegvideoencdsp_init.c | 14 --
 2 files changed, 2 insertions(+), 61 deletions(-)

diff --git a/libavcodec/x86/mpegvideoencdsp.asm 
b/libavcodec/x86/mpegvideoencdsp.asm
index aec73f82dc..e3d88f01c4 100644
--- a/libavcodec/x86/mpegvideoencdsp.asm
+++ b/libavcodec/x86/mpegvideoencdsp.asm
@@ -23,21 +23,15 @@
 
 %include "libavutil/x86/x86util.asm"
 
-SECTION_RODATA
-
-cextern pw_1
-
 SECTION .text
-; int ff_pix_sum16_mmx(uint8_t *pix, int line_size)
+; int ff_pix_sum16(uint8_t *pix, int line_size)
 ; %1 = number of loops
 ; %2 = number of GPRs used
 %macro PIX_SUM16 3
 cglobal pix_sum16, 2, %2, 6
 movsxdifnidn r1, r1d
 mov  r2, %1
-%if mmsize == 16
 lea  r3, [r1*3]
-%endif
 %if notcpuflag(xop)
 pxor m5, m5
 %endif
@@ -50,56 +44,27 @@ cglobal pix_sum16, 2, %2, 6
 vphaddubqm3, [r0+r3]
 %else
 mova m0, [r0]
-%if mmsize == 8
-mova m1, [r0+8]
-%if cpuflag(mmxext)
-mova m2, [r0+r1]
-mova m3, [r0+r1+8]
-%endif
-%else ; sse2
 mova m1, [r0+r1]
 mova m2, [r0+r1*2]
 mova m3, [r0+r3]
-%endif
-%if cpuflag(mmxext)
 psadbw   m0, m5
 psadbw   m1, m5
 psadbw   m2, m5
 psadbw   m3, m5
-%else ; mmx
-punpckhbwm2, m0, m5
-punpcklbwm0, m5
-punpckhbwm3, m1, m5
-punpcklbwm1, m5
-%endif ; cpuflag(mmxext)
 %endif ; cpuflag(xop)
 paddwm1, m0
 paddwm3, m2
 paddwm3, m1
 paddwm4, m3
-%if cpuflag(mmxext)
 lea  r0, [r0+r1*%3]
-%else
-add  r0, r1
-%endif
 dec r2
 jne .loop
-%if mmsize == 16
 pshufd   m0, m4, q0032
 padddm4, m0
-%elif notcpuflag(mmxext)
-HADDWm4, m5
-%endif
 movdeax, m4
 RET
 %endmacro
 
-%if ARCH_X86_32
-INIT_MMX mmx
-PIX_SUM16 16, 3, 0
-INIT_MMX mmxext
-PIX_SUM16  8, 4, 2
-%endif
 INIT_XMM sse2
 PIX_SUM16  4, 4, 4
 %if HAVE_XOP_EXTERNAL
@@ -107,7 +72,7 @@ INIT_XMM xop
 PIX_SUM16  4, 4, 4
 %endif
 
-; int ff_pix_norm1_mmx(uint8_t *pix, int line_size)
+; int ff_pix_norm1(uint8_t *pix, int line_size)
 ; %1 = number of xmm registers used
 ; %2 = number of loops
 %macro PIX_NORM1 2
@@ -118,11 +83,7 @@ cglobal pix_norm1, 2, 3, %1
 pxor m5, m5
 .loop:
 mova m2, [r0+0]
-%if mmsize == 8
-mova m3, [r0+8]
-%else
 mova m3, [r0+r1]
-%endif
 punpckhbwm1, m2, m0
 punpcklbwm2, m0
 punpckhbwm4, m3, m0
@@ -135,11 +96,7 @@ cglobal pix_norm1, 2, 3, %1
 padddm4, m3
 padddm5, m2
 padddm5, m4
-%if mmsize == 8
-add  r0, r1
-%else
 lea  r0, [r0+r1*2]
-%endif
 dec r2
 jne .loop
 HADDDm5, m1
@@ -147,8 +104,6 @@ cglobal pix_norm1, 2, 3, %1
 RET
 %endmacro
 
-INIT_MMX mmx
-PIX_NORM1 0, 16
 INIT_XMM sse2
 PIX_NORM1 6, 8
 
diff --git a/libavcodec/x86/mpegvideoencdsp_init.c 
b/libavcodec/x86/mpegvideoencdsp_init.c
index 532836cec9..b9c80b5382 100644
--- a/libavcodec/x86/mpegvideoencdsp_init.c
+++ b/libavcodec/x86/mpegvideoencdsp_init.c
@@ -23,11 +23,8 @@
 #include "libavcodec/avcodec.h"
 #include "libavcodec/mpegvideoencdsp.h"
 
-int ff_pix_sum16_mmx(uint8_t *pix, int line_size);
-int ff_pix_sum16_mmxext(uint8_t *pix, int line_size);
 int ff_pix_sum16_sse2(uint8_t *pix, int line_size);
 int ff_pix_sum16_xop(uint8_t *pix, int line_size);
-int ff_pix_norm1_mmx(uint8_t *pix, int line_size);
 int ff_pix_norm1_sse2(uint8_t *pix, int line_size);
 
 #if HAVE_INLINE_ASM
@@ -219,17 +216,6 @@ av_cold void 
ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
 {
 int cpu_flags = av_get_cpu_flags();
 
-#if ARCH_X86_32
-if (EXTERNAL_MMX(cpu_flags)) {
-c->pix_sum   = ff_pix_sum16_mmx;
-c->pix_norm1 = ff_pix_norm1_mmx;
-}
-
-if (EXTERNAL_MMXEXT(cpu_flags)) {
-c->pix_sum = ff_pix_sum16_mmxext;
-}
-#endif
-
 if (EXTERNAL_SSE2(cpu_flags)) {
 c->pix_sum = ff_pix_sum16_sse2;
 c->pix_norm1   = ff_pix_norm1_sse2;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsub

[FFmpeg-cvslog] avcodec/x86/me_cmp: Remove obsolete MMX(EXT) functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Tue Jun  7 23:34:42 2022 +0200| [542765ce3eccbca587d54262a512cbdb1407230d] | 
committer: Andreas Rheinhardt

avcodec/x86/me_cmp: Remove obsolete MMX(EXT) functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=542765ce3eccbca587d54262a512cbdb1407230d
---

 libavcodec/x86/me_cmp.asm|   9 +-
 libavcodec/x86/me_cmp_init.c | 349 +--
 2 files changed, 6 insertions(+), 352 deletions(-)

diff --git a/libavcodec/x86/me_cmp.asm b/libavcodec/x86/me_cmp.asm
index ad06d485ab..10809bbfb1 100644
--- a/libavcodec/x86/me_cmp.asm
+++ b/libavcodec/x86/me_cmp.asm
@@ -261,11 +261,10 @@ hadamard8_16_wrapper 0, 14
 %endif
 %endmacro
 
-INIT_MMX mmx
-HADAMARD8_DIFF
-
+%if HAVE_ALIGNED_STACK == 0
 INIT_MMX mmxext
 HADAMARD8_DIFF
+%endif
 
 INIT_XMM sse2
 %if ARCH_X86_64
@@ -385,10 +384,6 @@ cglobal sum_abs_dctelem, 1, 1, %1, block
 RET
 %endmacro
 
-INIT_MMX mmx
-SUM_ABS_DCTELEM 0, 4
-INIT_MMX mmxext
-SUM_ABS_DCTELEM 0, 4
 INIT_XMM sse2
 SUM_ABS_DCTELEM 7, 2
 INIT_XMM ssse3
diff --git a/libavcodec/x86/me_cmp_init.c b/libavcodec/x86/me_cmp_init.c
index 9af911bb88..61e9396b8f 100644
--- a/libavcodec/x86/me_cmp_init.c
+++ b/libavcodec/x86/me_cmp_init.c
@@ -30,8 +30,6 @@
 #include "libavcodec/me_cmp.h"
 #include "libavcodec/mpegvideo.h"
 
-int ff_sum_abs_dctelem_mmx(int16_t *block);
-int ff_sum_abs_dctelem_mmxext(int16_t *block);
 int ff_sum_abs_dctelem_sse2(int16_t *block);
 int ff_sum_abs_dctelem_ssse3(int16_t *block);
 int ff_sse8_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
@@ -85,7 +83,6 @@ int ff_vsad16_approx_sse2(MpegEncContext *v, uint8_t *pix1, 
uint8_t *pix2,
 int ff_hadamard8_diff16_ ## cpu(MpegEncContext *s, uint8_t *src1, \
 uint8_t *src2, ptrdiff_t stride, int h);
 
-hadamard_func(mmx)
 hadamard_func(mmxext)
 hadamard_func(sse2)
 hadamard_func(ssse3)
@@ -126,232 +123,12 @@ static int nsse8_mmx(MpegEncContext *c, uint8_t *pix1, 
uint8_t *pix2,
 
 #if HAVE_INLINE_ASM
 
-static int vsad_intra16_mmx(MpegEncContext *v, uint8_t *pix, uint8_t *dummy,
-ptrdiff_t stride, int h)
-{
-int tmp;
-
-av_assert2(((uintptr_t) pix & 7) == 0);
-av_assert2((stride & 7) == 0);
-
-#define SUM(in0, in1, out0, out1)   \
-"movq (%0), %%mm2\n"\
-"movq 8(%0), %%mm3\n"   \
-"add %2,%0\n"   \
-"movq %%mm2, " #out0 "\n"   \
-"movq %%mm3, " #out1 "\n"   \
-"psubusb " #in0 ", %%mm2\n" \
-"psubusb " #in1 ", %%mm3\n" \
-"psubusb " #out0 ", " #in0 "\n" \
-"psubusb " #out1 ", " #in1 "\n" \
-"por %%mm2, " #in0 "\n" \
-"por %%mm3, " #in1 "\n" \
-"movq " #in0 ", %%mm2\n"\
-"movq " #in1 ", %%mm3\n"\
-"punpcklbw %%mm7, " #in0 "\n"   \
-"punpcklbw %%mm7, " #in1 "\n"   \
-"punpckhbw %%mm7, %%mm2\n"  \
-"punpckhbw %%mm7, %%mm3\n"  \
-"paddw " #in1 ", " #in0 "\n"\
-"paddw %%mm3, %%mm2\n"  \
-"paddw %%mm2, " #in0 "\n"   \
-"paddw " #in0 ", %%mm6\n"
-
-
-__asm__ volatile (
-"movl%3, %%ecx\n"
-"pxor %%mm6, %%mm6\n"
-"pxor %%mm7, %%mm7\n"
-"movq  (%0), %%mm0\n"
-"movq 8(%0), %%mm1\n"
-"add %2, %0\n"
-"jmp 2f\n"
-"1:\n"
-
-SUM(%%mm4, %%mm5, %%mm0, %%mm1)
-"2:\n"
-SUM(%%mm0, %%mm1, %%mm4, %%mm5)
-
-"subl $2, %%ecx\n"
-"jnz 1b\n"
-
-"movq  %%mm6, %%mm0\n"
-"psrlq $32,   %%mm6\n"
-"paddw %%mm6, %%mm0\n"
-"movq  %%mm0, %%mm6\n"
-"psrlq $16,   %%mm0\n"
-"paddw %%mm6, %%mm0\n"
-"movd  %%mm0, %1\n"
-: "+r" (pix), "=r" (tmp)
-: "r" (stride), "m" (h)
-: "%ecx");
-
-return tmp & 0x;
-}
-#undef SUM
-
-static int vsad16_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
-  ptrdiff_t stride, int h)
-{
-int tmp;
-
-av_assert2(((uintptr_t)pix1 & 7) == 0);
-av_assert2(((uintptr_t)pix2 & 7) == 0);
-av_assert2((stride & 7) == 0);
-
-#define SUM(in0, in1, out0, out1)   \
-"movq (%0), %%mm2\n"\
-"movq (%1), " #out0 "\n"\
-"movq 8(%0), %%mm3\n"   \
-"movq 8(%1), " #out1 "\n"   \
-"add %3, %0\n"  

[FFmpeg-cvslog] avcodec/x86/h264dsp_init: Remove obsolete MMX(EXT) functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Sat Jun 11 16:24:23 2022 +0200| [4618f36a2424a3a4d5760afabc2e9dd18d73f0a4] | 
committer: Andreas Rheinhardt

avcodec/x86/h264dsp_init: Remove obsolete MMX(EXT) functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4618f36a2424a3a4d5760afabc2e9dd18d73f0a4
---

 libavcodec/x86/h264_deblock.asm   | 196 -
 libavcodec/x86/h264_deblock_10bit.asm |  42 +---
 libavcodec/x86/h264_idct.asm  | 382 --
 libavcodec/x86/h264_weight.asm|  36 
 libavcodec/x86/h264dsp_init.c |  95 +
 5 files changed, 9 insertions(+), 742 deletions(-)

diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index a2e745cd8e..479e6c3460 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -581,8 +581,6 @@ cglobal deblock_h_luma_8, 0,5,8,0x60+12
 RET
 %endmacro ; DEBLOCK_LUMA
 
-INIT_MMX mmxext
-DEBLOCK_LUMA v8, 8
 INIT_XMM sse2
 DEBLOCK_LUMA v, 16
 %if HAVE_AVX_EXTERNAL
@@ -864,200 +862,6 @@ DEBLOCK_LUMA_INTRA v
 INIT_XMM avx
 DEBLOCK_LUMA_INTRA v
 %endif
-%if ARCH_X86_64 == 0
-INIT_MMX mmxext
-DEBLOCK_LUMA_INTRA v8
-%endif
-
-INIT_MMX mmxext
-
-%macro CHROMA_V_START 0
-decr2d  ; alpha-1
-decr3d  ; beta-1
-movt5, r0
-subt5, r1
-subt5, r1
-%endmacro
-
-%macro CHROMA_H_START 0
-decr2d
-decr3d
-subr0, 2
-leat6, [r1*3]
-movt5, r0
-addr0, t6
-%endmacro
-
-%define t5 r5
-%define t6 r6
-
-;-
-; void ff_deblock_v_chroma(uint8_t *pix, int stride, int alpha, int beta,
-;  int8_t *tc0)
-;-
-cglobal deblock_v_chroma_8, 5,6
-CHROMA_V_START
-movq  m0, [t5]
-movq  m1, [t5+r1]
-movq  m2, [r0]
-movq  m3, [r0+r1]
-call ff_chroma_inter_body_mmxext
-movq  [t5+r1], m1
-movq  [r0], m2
-RET
-
-;-
-; void ff_deblock_h_chroma(uint8_t *pix, int stride, int alpha, int beta,
-;  int8_t *tc0)
-;-
-cglobal deblock_h_chroma_8, 5,7
-%if ARCH_X86_64
-; This could use the red zone on 64 bit unix to avoid the stack pointer
-; readjustment, but valgrind assumes the red zone is clobbered on
-; function calls and returns.
-sub   rsp, 16
-%define buf0 [rsp]
-%define buf1 [rsp+8]
-%else
-%define buf0 r0m
-%define buf1 r2m
-%endif
-CHROMA_H_START
-TRANSPOSE4x8_LOAD  bw, wd, dq, PASS8ROWS(t5, r0, r1, t6)
-movq  buf0, m0
-movq  buf1, m3
-LOAD_MASK  r2d, r3d
-movd   m6, [r4] ; tc0
-punpcklbw  m6, m6
-pand   m7, m6
-DEBLOCK_P0_Q0
-movq  m0, buf0
-movq  m3, buf1
-TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6)
-%if ARCH_X86_64
-add   rsp, 16
-%endif
-RET
-
-ALIGN 16
-ff_chroma_inter_body_mmxext:
-LOAD_MASK  r2d, r3d
-movd   m6, [r4] ; tc0
-punpcklbw  m6, m6
-pand   m7, m6
-DEBLOCK_P0_Q0
-ret
-
-%define t5 r4
-%define t6 r5
-
-cglobal deblock_h_chroma422_8, 5, 6
-SUB rsp, (1+ARCH_X86_64*2)*mmsize
-%if ARCH_X86_64
-%define buf0 [rsp+16]
-%define buf1 [rsp+8]
-%else
-%define buf0 r0m
-%define buf1 r2m
-%endif
-
-movd m6, [r4]
-punpcklbw m6, m6
-movq [rsp], m6
-CHROMA_H_START
-
-TRANSPOSE4x8B_LOAD PASS8ROWS(t5, r0, r1, t6)
-movq buf0, m0
-movq buf1, m3
-LOAD_MASK r2d, r3d
-movd m6, [rsp]
-punpcklwd m6, m6
-pand m7, m6
-DEBLOCK_P0_Q0
-movq m0, buf0
-movq m3, buf1
-TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6)
-
-lea r0, [r0+r1*8]
-lea t5, [t5+r1*8]
-
-TRANSPOSE4x8B_LOAD PASS8ROWS(t5, r0, r1, t6)
-movq buf0, m0
-movq buf1, m3
-LOAD_MASK r2d, r3d
-movd m6, [rsp+4]
-punpcklwd m6, m6
-pand m7, m6
-DEBLOCK_P0_Q0
-movq m0, buf0
-movq m3, buf1
-TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6)
-ADD rsp, (1+ARCH_X86_64*2)*mmsize
-RET
-
-; in: %1=p0 %2=p1 %3=q1
-; out: p0 = (p0 + q1 + 2*p1 + 2) >> 2
-%macro CHROMA_INTRA_P0 3
-movqm4, %1
-pxorm4, %3
-pandm4, [pb_1] ; m4 = (p0^q1)&1
-pavgb   %1, %3
-psubusb %1, m4
-pavgb   %1, %2 ; dst = avg(p1, avg(p0,q1) - ((p0^q1)&1))
-%endmacro
-
-;

[FFmpeg-cvslog] avcodec/x86/sbrdsp: Remove obsolete SSE function

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Fri Jun 10 20:10:45 2022 +0200| [3d151bab323271f476b21d5d84a8cdd6dc20a760] | 
committer: Andreas Rheinhardt

avcodec/x86/sbrdsp: Remove obsolete SSE function

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from ff_sbr_qmf_deint_bfly_sse are truely ancient 32bit x86s
it is removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3d151bab323271f476b21d5d84a8cdd6dc20a760
---

 libavcodec/x86/sbrdsp.asm| 16 +---
 libavcodec/x86/sbrdsp_init.c |  2 --
 2 files changed, 1 insertion(+), 17 deletions(-)

diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm
index 62bbe512ec..87dcdc43ce 100644
--- a/libavcodec/x86/sbrdsp.asm
+++ b/libavcodec/x86/sbrdsp.asm
@@ -251,7 +251,7 @@ cglobal sbr_neg_odd_64, 1,2,4,z
 REP_RET
 
 ; void ff_sbr_qmf_deint_bfly_sse2(float *v, const float *src0, const float 
*src1)
-%macro SBR_QMF_DEINT_BFLY  0
+INIT_XMM sse2
 cglobal sbr_qmf_deint_bfly, 3,5,8, v,src0,src1,vrev,c
 mov   cq, 64*4-2*mmsize
 leavrevq, [vq + 64*4]
@@ -260,17 +260,10 @@ cglobal sbr_qmf_deint_bfly, 3,5,8, v,src0,src1,vrev,c
 mova  m1, [src1q]
 mova  m4, [src0q+cq+mmsize]
 mova  m5, [src1q+mmsize]
-%if cpuflag(sse2)
 pshufdm2, m0, q0123
 pshufdm3, m1, q0123
 pshufdm6, m4, q0123
 pshufdm7, m5, q0123
-%else
-shufpsm2, m0, m0, q0123
-shufpsm3, m1, m1, q0123
-shufpsm6, m4, m4, q0123
-shufpsm7, m5, m5, q0123
-%endif
 addps m5, m2
 subps m0, m7
 addps m1, m6
@@ -284,13 +277,6 @@ cglobal sbr_qmf_deint_bfly, 3,5,8, v,src0,src1,vrev,c
 sub   cq, 2*mmsize
 jge.loop
 REP_RET
-%endmacro
-
-INIT_XMM sse
-SBR_QMF_DEINT_BFLY
-
-INIT_XMM sse2
-SBR_QMF_DEINT_BFLY
 
 INIT_XMM sse2
 cglobal sbr_qmf_pre_shuffle, 1,4,6,z
diff --git a/libavcodec/x86/sbrdsp_init.c b/libavcodec/x86/sbrdsp_init.c
index 6911a1a515..999f681220 100644
--- a/libavcodec/x86/sbrdsp_init.c
+++ b/libavcodec/x86/sbrdsp_init.c
@@ -34,7 +34,6 @@ void ff_sbr_hf_gen_sse(float (*X_high)[2], const float 
(*X_low)[2],
float bw, int start, int end);
 void ff_sbr_neg_odd_64_sse(float *z);
 void ff_sbr_qmf_post_shuffle_sse(float W[32][2], const float *z);
-void ff_sbr_qmf_deint_bfly_sse(float *v, const float *src0, const float *src1);
 void ff_sbr_qmf_deint_bfly_sse2(float *v, const float *src0, const float 
*src1);
 void ff_sbr_qmf_pre_shuffle_sse2(float *z);
 
@@ -67,7 +66,6 @@ av_cold void ff_sbrdsp_init_x86(SBRDSPContext *s)
 s->hf_g_filt  = ff_sbr_hf_g_filt_sse;
 s->hf_gen = ff_sbr_hf_gen_sse;
 s->qmf_post_shuffle = ff_sbr_qmf_post_shuffle_sse;
-s->qmf_deint_bfly   = ff_sbr_qmf_deint_bfly_sse;
 s->qmf_deint_neg= ff_sbr_qmf_deint_neg_sse;
 s->autocorrelate= ff_sbr_autocorrelate_sse;
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/x86/xvididct: Remove obsolete MMX(EXT) functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Fri Jun 10 20:47:39 2022 +0200| [b2437a45af58b0a9d726f1ee082e7d2809175b99] | 
committer: Andreas Rheinhardt

avcodec/x86/xvididct: Remove obsolete MMX(EXT) functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b2437a45af58b0a9d726f1ee082e7d2809175b99
---

 libavcodec/tests/x86/dct.c |   4 -
 libavcodec/x86/xvididct.asm| 411 -
 libavcodec/x86/xvididct.h  |   8 -
 libavcodec/x86/xvididct_init.c |  43 -
 4 files changed, 466 deletions(-)

diff --git a/libavcodec/tests/x86/dct.c b/libavcodec/tests/x86/dct.c
index 6e3d8f7c01..207a2bcb36 100644
--- a/libavcodec/tests/x86/dct.c
+++ b/libavcodec/tests/x86/dct.c
@@ -69,10 +69,6 @@ static const struct algo idct_tab_arch[] = {
 { "SIMPLE-MMX",  ff_simple_idct_mmx,  FF_IDCT_PERM_SIMPLE, AV_CPU_FLAG_MMX 
},
 #endif
 #if CONFIG_MPEG4_DECODER && HAVE_X86ASM
-#if ARCH_X86_32
-{ "XVID-MMX",ff_xvid_idct_mmx,FF_IDCT_PERM_NONE,   
AV_CPU_FLAG_MMX,1 },
-{ "XVID-MMXEXT", ff_xvid_idct_mmxext, FF_IDCT_PERM_NONE,   
AV_CPU_FLAG_MMXEXT, 1 },
-#endif
 #if HAVE_SSE2_EXTERNAL
 { "XVID-SSE2",   ff_xvid_idct_sse2,   FF_IDCT_PERM_SSE2,   
AV_CPU_FLAG_SSE2,   1 },
 #endif
diff --git a/libavcodec/x86/xvididct.asm b/libavcodec/x86/xvididct.asm
index 0220885da6..4197551cdf 100644
--- a/libavcodec/x86/xvididct.asm
+++ b/libavcodec/x86/xvididct.asm
@@ -91,145 +91,6 @@ iTab4:  dw 0x4b42, 0x6254, 0xb4be, 0x9dac, 0x4b42, 0xd746, 
0x4b42, 0xd746
 dw 0x3b21, 0x14c3, 0x587e, 0xeb3d, 0x14c3, 0x587e, 0x14c3, 0xc4df
 dw 0x6862, 0x587e, 0x979e, 0xc4df, 0x3b21, 0x979e, 0x587e, 0x979e
 
-%if ARCH_X86_32
-; -
-;
-; The first stage iDCT 8x8 - inverse DCTs of rows
-;
-; -
-; The 8-point inverse DCT direct algorithm
-; -
-;
-; static const short w[32] = {
-; FIX(cos_4_16),  FIX(cos_2_16),  FIX(cos_4_16),  FIX(cos_6_16),
-; FIX(cos_4_16),  FIX(cos_6_16), -FIX(cos_4_16), -FIX(cos_2_16),
-; FIX(cos_4_16), -FIX(cos_6_16), -FIX(cos_4_16),  FIX(cos_2_16),
-; FIX(cos_4_16), -FIX(cos_2_16),  FIX(cos_4_16), -FIX(cos_6_16),
-; FIX(cos_1_16),  FIX(cos_3_16),  FIX(cos_5_16),  FIX(cos_7_16),
-; FIX(cos_3_16), -FIX(cos_7_16), -FIX(cos_1_16), -FIX(cos_5_16),
-; FIX(cos_5_16), -FIX(cos_1_16),  FIX(cos_7_16),  FIX(cos_3_16),
-; FIX(cos_7_16), -FIX(cos_5_16),  FIX(cos_3_16), -FIX(cos_1_16) };
-;
-; #define DCT_8_INV_ROW(x, y)
-; {
-; int a0, a1, a2, a3, b0, b1, b2, b3;
-;
-; a0 = x[0] * w[0]  + x[2] * w[1]  + x[4] * w[2]  + x[6] * w[3];
-; a1 = x[0] * w[4]  + x[2] * w[5]  + x[4] * w[6]  + x[6] * w[7];
-; a2 = x[0] * w[8]  + x[2] * w[9]  + x[4] * w[10] + x[6] * w[11];
-; a3 = x[0] * w[12] + x[2] * w[13] + x[4] * w[14] + x[6] * w[15];
-; b0 = x[1] * w[16] + x[3] * w[17] + x[5] * w[18] + x[7] * w[19];
-; b1 = x[1] * w[20] + x[3] * w[21] + x[5] * w[22] + x[7] * w[23];
-; b2 = x[1] * w[24] + x[3] * w[25] + x[5] * w[26] + x[7] * w[27];
-; b3 = x[1] * w[28] + x[3] * w[29] + x[5] * w[30] + x[7] * w[31];
-;
-; y[0] = SHIFT_ROUND(a0 + b0);
-; y[1] = SHIFT_ROUND(a1 + b1);
-; y[2] = SHIFT_ROUND(a2 + b2);
-; y[3] = SHIFT_ROUND(a3 + b3);
-; y[4] = SHIFT_ROUND(a3 - b3);
-; y[5] = SHIFT_ROUND(a2 - b2);
-; y[6] = SHIFT_ROUND(a1 - b1);
-; y[7] = SHIFT_ROUND(a0 - b0);
-; }
-;
-; -
-;
-; In this implementation the outputs of the iDCT-1D are multiplied
-; for rows 0,4 - by cos_4_16,
-; for rows 1,7 - by cos_1_16,
-; for rows 2,6 - by cos_2_16,
-; for rows 3,5 - by cos_3_16
-; and are shifted to the left for better accuracy.
-;
-; For the constants used,
-; FIX(float_const) = (short) (float_const * (1 << 15) + 0.5)
-;
-; -
-
-; -
-; Tables for mmx processors
-; -
-
-; Table for rows 0,4 - constants are multiplied by cos_4_16
-tab_i_04_mmx: dw  16384,  16384,  16384, -16384
-  dw  21407,   8867,   8867, -21407 ; w07 w05 w03 w01
-  dw  16384, -16384,  16384,  16384 ; w14 w12 w10 w08
-  dw  -8867,  21407, -21407,  -8867 ; w15 w13 w11 w09
-  dw  22725,  12873,  192

[FFmpeg-cvslog] avcodec/x86/idctdsp: Remove obsolete MMX(EXT) functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Fri Jun 10 20:28:06 2022 +0200| [bfb28b5ce89f3e950214b67ea95b45e3355c2caf] | 
committer: Andreas Rheinhardt

avcodec/x86/idctdsp: Remove obsolete MMX(EXT) functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=bfb28b5ce89f3e950214b67ea95b45e3355c2caf
---

 libavcodec/tests/x86/dct.c |  3 --
 libavcodec/x86/idctdsp.asm | 79 +++---
 libavcodec/x86/idctdsp.h   |  6 
 libavcodec/x86/idctdsp_init.c  | 11 +++---
 libavcodec/x86/simple_idct.asm | 22 ++--
 5 files changed, 10 insertions(+), 111 deletions(-)

diff --git a/libavcodec/tests/x86/dct.c b/libavcodec/tests/x86/dct.c
index 207a2bcb36..ef0662ae37 100644
--- a/libavcodec/tests/x86/dct.c
+++ b/libavcodec/tests/x86/dct.c
@@ -65,9 +65,6 @@ static const struct algo fdct_tab_arch[] = {
 };
 
 static const struct algo idct_tab_arch[] = {
-#if HAVE_MMX_EXTERNAL
-{ "SIMPLE-MMX",  ff_simple_idct_mmx,  FF_IDCT_PERM_SIMPLE, AV_CPU_FLAG_MMX 
},
-#endif
 #if CONFIG_MPEG4_DECODER && HAVE_X86ASM
 #if HAVE_SSE2_EXTERNAL
 { "XVID-SSE2",   ff_xvid_idct_sse2,   FF_IDCT_PERM_SSE2,   
AV_CPU_FLAG_SSE2,   1 },
diff --git a/libavcodec/x86/idctdsp.asm b/libavcodec/x86/idctdsp.asm
index 089425a9ab..1cfdb5419d 100644
--- a/libavcodec/x86/idctdsp.asm
+++ b/libavcodec/x86/idctdsp.asm
@@ -37,47 +37,24 @@ SECTION .text
 %macro PUT_SIGNED_PIXELS_CLAMPED_HALF 1
 mova m1, [blockq+mmsize*0+%1]
 mova m2, [blockq+mmsize*2+%1]
-%if mmsize == 8
-mova m3, [blockq+mmsize*4+%1]
-mova m4, [blockq+mmsize*6+%1]
-%endif
 packsswb m1, [blockq+mmsize*1+%1]
 packsswb m2, [blockq+mmsize*3+%1]
-%if mmsize == 8
-packsswb m3, [blockq+mmsize*5+%1]
-packsswb m4, [blockq+mmsize*7+%1]
-%endif
 paddbm1, m0
 paddbm2, m0
-%if mmsize == 8
-paddbm3, m0
-paddbm4, m0
-movq [pixelsq+lsizeq*0], m1
-movq [pixelsq+lsizeq*1], m2
-movq [pixelsq+lsizeq*2], m3
-movq [pixelsq+lsize3q ], m4
-%else
 movq [pixelsq+lsizeq*0], m1
 movhps   [pixelsq+lsizeq*1], m1
 movq [pixelsq+lsizeq*2], m2
 movhps   [pixelsq+lsize3q ], m2
-%endif
 %endmacro
 
-%macro PUT_SIGNED_PIXELS_CLAMPED 1
-cglobal put_signed_pixels_clamped, 3, 4, %1, block, pixels, lsize, lsize3
+INIT_XMM sse2
+cglobal put_signed_pixels_clamped, 3, 4, 3, block, pixels, lsize, lsize3
 mova m0, [pb_80]
 lea  lsize3q, [lsizeq*3]
 PUT_SIGNED_PIXELS_CLAMPED_HALF 0
 lea  pixelsq, [pixelsq+lsizeq*4]
 PUT_SIGNED_PIXELS_CLAMPED_HALF 64
 RET
-%endmacro
-
-INIT_MMX mmx
-PUT_SIGNED_PIXELS_CLAMPED 0
-INIT_XMM sse2
-PUT_SIGNED_PIXELS_CLAMPED 3
 
 ;--
 ; void ff_put_pixels_clamped(const int16_t *block, uint8_t *pixels,
@@ -87,40 +64,21 @@ PUT_SIGNED_PIXELS_CLAMPED 3
 %macro PUT_PIXELS_CLAMPED_HALF 1
 mova m0, [blockq+mmsize*0+%1]
 mova m1, [blockq+mmsize*2+%1]
-%if mmsize == 8
-mova m2, [blockq+mmsize*4+%1]
-mova m3, [blockq+mmsize*6+%1]
-%endif
 packuswb m0, [blockq+mmsize*1+%1]
 packuswb m1, [blockq+mmsize*3+%1]
-%if mmsize == 8
-packuswb m2, [blockq+mmsize*5+%1]
-packuswb m3, [blockq+mmsize*7+%1]
-movq   [pixelsq], m0
-movq[lsizeq+pixelsq], m1
-movq  [2*lsizeq+pixelsq], m2
-movq   [lsize3q+pixelsq], m3
-%else
 movq   [pixelsq], m0
 movhps  [lsizeq+pixelsq], m0
 movq  [2*lsizeq+pixelsq], m1
 movhps [lsize3q+pixelsq], m1
-%endif
 %endmacro
 
-%macro PUT_PIXELS_CLAMPED 0
+INIT_XMM sse2
 cglobal put_pixels_clamped, 3, 4, 2, block, pixels, lsize, lsize3
 lea lsize3q, [lsizeq*3]
 PUT_PIXELS_CLAMPED_HALF 0
 lea pixelsq, [pixelsq+lsizeq*4]
 PUT_PIXELS_CLAMPED_HALF 64
 RET
-%endmacro
-
-INIT_MMX mmx
-PUT_PIXELS_CLAMPED
-INIT_XMM sse2
-PUT_PIXELS_CLAMPED
 
 ;--
 ; void ff_add_pixels_clamped(const int16_t *block, uint8_t *pixels,
@@ -130,41 +88,18 @@ PUT_PIXELS_CLAMPED
 %macro ADD_PIXELS_CLAMPED 1
 mova   m0, [blockq+mmsize*0+%1]
 mova   m1, [blockq+mmsize*1+%1]
-%if mmsize == 8
-mova   m5, [blockq+mmsize*2+%1]
-mova   m6, [blockq+mmsize*3+%1]
-%endif
 movq   m2, [pixelsq]
 movq   m3, [pixelsq+lsizeq]
-%if mmsize == 8
-mova   m7, m2
-punpcklbw  m2, m4
-punpckhbw  m7, m4
-paddsw m0, m2
-paddsw m1, m7
-mova   m7, m3
-punpcklbw  m3, m4
-punpckhbw  m7, m4
-paddsw m5, m3
-paddsw m6, m7
-%else

[FFmpeg-cvslog] avcodec/x86/blockdsp: Remove obsolete MMX functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Fri Jun 10 20:49:50 2022 +0200| [ee551a21ddcbf81afe183d9489c534ee80f263a0] | 
committer: Andreas Rheinhardt

avcodec/x86/blockdsp: Remove obsolete MMX functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ee551a21ddcbf81afe183d9489c534ee80f263a0
---

 libavcodec/x86/blockdsp.asm| 6 --
 libavcodec/x86/blockdsp_init.c | 7 ---
 2 files changed, 13 deletions(-)

diff --git a/libavcodec/x86/blockdsp.asm b/libavcodec/x86/blockdsp.asm
index 9d203df8f5..e380308d4a 100644
--- a/libavcodec/x86/blockdsp.asm
+++ b/libavcodec/x86/blockdsp.asm
@@ -46,9 +46,6 @@ cglobal clear_block, 1, 1, %1, blocks
 RET
 %endmacro
 
-INIT_MMX mmx
-%define ZERO pxor
-CLEAR_BLOCK 0, 4
 INIT_XMM sse
 %define ZERO xorps
 CLEAR_BLOCK 1, 2
@@ -78,9 +75,6 @@ cglobal clear_blocks, 1, 2, %1, blocks, len
 RET
 %endmacro
 
-INIT_MMX mmx
-%define ZERO pxor
-CLEAR_BLOCKS 0
 INIT_XMM sse
 %define ZERO xorps
 CLEAR_BLOCKS 1
diff --git a/libavcodec/x86/blockdsp_init.c b/libavcodec/x86/blockdsp_init.c
index d7f8a8e508..b0ff9376d9 100644
--- a/libavcodec/x86/blockdsp_init.c
+++ b/libavcodec/x86/blockdsp_init.c
@@ -24,10 +24,8 @@
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/blockdsp.h"
 
-void ff_clear_block_mmx(int16_t *block);
 void ff_clear_block_sse(int16_t *block);
 void ff_clear_block_avx(int16_t *block);
-void ff_clear_blocks_mmx(int16_t *blocks);
 void ff_clear_blocks_sse(int16_t *blocks);
 void ff_clear_blocks_avx(int16_t *blocks);
 
@@ -37,11 +35,6 @@ av_cold void ff_blockdsp_init_x86(BlockDSPContext *c,
 #if HAVE_X86ASM
 int cpu_flags = av_get_cpu_flags();
 
-if (EXTERNAL_MMX(cpu_flags)) {
-c->clear_block  = ff_clear_block_mmx;
-c->clear_blocks = ff_clear_blocks_mmx;
-}
-
 if (EXTERNAL_SSE(cpu_flags)) {
 c->clear_block  = ff_clear_block_sse;
 c->clear_blocks = ff_clear_blocks_sse;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/x86/pixblockdsp: Remove obsolete MMX functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Fri Jun 10 20:51:41 2022 +0200| [92b58002776edd3a3df03c90e8a3ab24b8f987de] | 
committer: Andreas Rheinhardt

avcodec/x86/pixblockdsp: Remove obsolete MMX functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=92b58002776edd3a3df03c90e8a3ab24b8f987de
---

 libavcodec/x86/pixblockdsp.asm| 51 +++
 libavcodec/x86/pixblockdsp_init.c | 12 -
 2 files changed, 3 insertions(+), 60 deletions(-)

diff --git a/libavcodec/x86/pixblockdsp.asm b/libavcodec/x86/pixblockdsp.asm
index 440fe29bcc..5fdd2914eb 100644
--- a/libavcodec/x86/pixblockdsp.asm
+++ b/libavcodec/x86/pixblockdsp.asm
@@ -25,30 +25,6 @@
 
 SECTION .text
 
-INIT_MMX mmx
-; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, ptrdiff_t 
stride)
-cglobal get_pixels, 3,4
-add  r0, 128
-mov  r3, -128
-pxor m7, m7
-.loop:
-mova m0, [r1]
-mova m2, [r1+r2]
-mova m1, m0
-mova m3, m2
-punpcklbwm0, m7
-punpckhbwm1, m7
-punpcklbwm2, m7
-punpckhbwm3, m7
-mova [r0+r3+ 0], m0
-mova [r0+r3+ 8], m1
-mova [r0+r3+16], m2
-mova [r0+r3+24], m3
-lea  r1, [r1+r2*2]
-add  r3, 32
-js .loop
-REP_RET
-
 INIT_XMM sse2
 cglobal get_pixels, 3, 4, 5
 lea  r3, [r2*3]
@@ -80,9 +56,9 @@ cglobal get_pixels, 3, 4, 5
 mova  [r0+0x70], m3
 RET
 
-; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
-; ptrdiff_t stride);
-%macro DIFF_PIXELS 0
+; void ff_diff_pixels(int16_t *block, const uint8_t *s1, const uint8_t *s2,
+; ptrdiff_t stride);
+INIT_XMM sse2
 cglobal diff_pixels, 4,5,5
 pxor m4, m4
 add  r0,  128
@@ -90,39 +66,18 @@ cglobal diff_pixels, 4,5,5
 .loop:
 movq m0, [r1]
 movq m2, [r2]
-%if mmsize == 8
-movq m1, m0
-movq m3, m2
-punpcklbwm0, m4
-punpckhbwm1, m4
-punpcklbwm2, m4
-punpckhbwm3, m4
-%else
 movq m1, [r1+r3]
 movq m3, [r2+r3]
 punpcklbwm0, m4
 punpcklbwm1, m4
 punpcklbwm2, m4
 punpcklbwm3, m4
-%endif
 psubwm0, m2
 psubwm1, m3
 mova  [r0+r4+0], m0
 mova  [r0+r4+mmsize], m1
-%if mmsize == 8
-add  r1, r3
-add  r2, r3
-%else
 lea  r1, [r1+r3*2]
 lea  r2, [r2+r3*2]
-%endif
 add  r4, 2 * mmsize
 jne .loop
 RET
-%endmacro
-
-INIT_MMX mmx
-DIFF_PIXELS
-
-INIT_XMM sse2
-DIFF_PIXELS
diff --git a/libavcodec/x86/pixblockdsp_init.c 
b/libavcodec/x86/pixblockdsp_init.c
index 3a5eb6959c..51f2a0033a 100644
--- a/libavcodec/x86/pixblockdsp_init.c
+++ b/libavcodec/x86/pixblockdsp_init.c
@@ -23,10 +23,7 @@
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/pixblockdsp.h"
 
-void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, ptrdiff_t 
stride);
 void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, ptrdiff_t 
stride);
-void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
-ptrdiff_t stride);
 void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2,
  ptrdiff_t stride);
 
@@ -36,15 +33,6 @@ av_cold void ff_pixblockdsp_init_x86(PixblockDSPContext *c,
 {
 int cpu_flags = av_get_cpu_flags();
 
-if (EXTERNAL_MMX(cpu_flags)) {
-if (!high_bit_depth) {
-c->get_pixels_unaligned =
-c->get_pixels = ff_get_pixels_mmx;
-}
-c->diff_pixels_unaligned =
-c->diff_pixels = ff_diff_pixels_mmx;
-}
-
 if (EXTERNAL_SSE2(cpu_flags)) {
 if (!high_bit_depth) {
 c->get_pixels_unaligned =

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/x86/lossless_audiodsp: Remove obsolete MMXEXT function

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Fri Jun 10 20:54:45 2022 +0200| [6feea076e98512d78c8d735509ab6b5e9a71ca1c] | 
committer: Andreas Rheinhardt

avcodec/x86/lossless_audiodsp: Remove obsolete MMXEXT function

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from ff_scalarproduct_and_madd_int16_mmxext are truely
ancient 32bit x86s it is removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6feea076e98512d78c8d735509ab6b5e9a71ca1c
---

 libavcodec/x86/lossless_audiodsp.asm| 12 +---
 libavcodec/x86/lossless_audiodsp_init.c |  6 --
 2 files changed, 1 insertion(+), 17 deletions(-)

diff --git a/libavcodec/x86/lossless_audiodsp.asm 
b/libavcodec/x86/lossless_audiodsp.asm
index 063d7b41af..ff18eb7081 100644
--- a/libavcodec/x86/lossless_audiodsp.asm
+++ b/libavcodec/x86/lossless_audiodsp.asm
@@ -22,18 +22,14 @@
 
 SECTION .text
 
-%macro SCALARPRODUCT 0
 ; int ff_scalarproduct_and_madd_int16(int16_t *v1, int16_t *v2, int16_t *v3,
 ; int order, int mul)
+INIT_XMM sse2
 cglobal scalarproduct_and_madd_int16, 4,4,8, v1, v2, v3, order, mul
 shl orderq, 1
 movdm7, mulm
-%if mmsize == 16
 pshuflw m7, m7, 0
 punpcklqdq m7, m7
-%else
-pshufw  m7, m7, 0
-%endif
 pxorm6, m6
 add v1q, orderq
 add v2q, orderq
@@ -61,12 +57,6 @@ cglobal scalarproduct_and_madd_int16, 4,4,8, v1, v2, v3, 
order, mul
 HADDD   m6, m0
 movd   eax, m6
 RET
-%endmacro
-
-INIT_MMX mmxext
-SCALARPRODUCT
-INIT_XMM sse2
-SCALARPRODUCT
 
 INIT_XMM sse4
 ; int ff_scalarproduct_and_madd_int32(int16_t *v1, int32_t *v2, int16_t *v3,
diff --git a/libavcodec/x86/lossless_audiodsp_init.c 
b/libavcodec/x86/lossless_audiodsp_init.c
index f74c7e4361..462329db32 100644
--- a/libavcodec/x86/lossless_audiodsp_init.c
+++ b/libavcodec/x86/lossless_audiodsp_init.c
@@ -21,9 +21,6 @@
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/lossless_audiodsp.h"
 
-int32_t ff_scalarproduct_and_madd_int16_mmxext(int16_t *v1, const int16_t *v2,
-   const int16_t *v3,
-   int order, int mul);
 int32_t ff_scalarproduct_and_madd_int16_sse2(int16_t *v1, const int16_t *v2,
  const int16_t *v3,
  int order, int mul);
@@ -40,9 +37,6 @@ av_cold void ff_llauddsp_init_x86(LLAudDSPContext *c)
 #if HAVE_X86ASM
 int cpu_flags = av_get_cpu_flags();
 
-if (EXTERNAL_MMXEXT(cpu_flags))
-c->scalarproduct_and_madd_int16 = 
ff_scalarproduct_and_madd_int16_mmxext;
-
 if (EXTERNAL_SSE2(cpu_flags))
 c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2;
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/x86/svq1enc: Remove obsolete MMXEXT function

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Fri Jun 10 20:58:35 2022 +0200| [9426a2f8ff4607b7293e6140e56b8cc44e629dbd] | 
committer: Andreas Rheinhardt

avcodec/x86/svq1enc: Remove obsolete MMXEXT function

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from ff_ssd_int8_vs_int16_mmx are truely ancient
32bit x86s it is removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9426a2f8ff4607b7293e6140e56b8cc44e629dbd
---

 libavcodec/x86/svq1enc.asm| 22 +-
 libavcodec/x86/svq1enc_init.c |  5 -
 2 files changed, 1 insertion(+), 26 deletions(-)

diff --git a/libavcodec/x86/svq1enc.asm b/libavcodec/x86/svq1enc.asm
index a87632836d..123e86ba24 100644
--- a/libavcodec/x86/svq1enc.asm
+++ b/libavcodec/x86/svq1enc.asm
@@ -23,39 +23,19 @@
 
 SECTION .text
 
-%macro SSD_INT8_VS_INT16 0
+INIT_XMM sse2
 cglobal ssd_int8_vs_int16, 3, 3, 3, pix1, pix2, size
 pxor m0, m0
 .loop:
 sub   sizeq, 8
 movq  m1, [pix1q + sizeq]
 mova  m2, [pix2q + sizeq*2]
-%if mmsize == 8
-movq  m3, [pix2q + sizeq*2 + mmsize]
-punpckhbw m4, m1
-punpcklbw m1, m1
-psraw m4, 8
-psraw m1, 8
-psubw m3, m4
-psubw m2, m1
-pmaddwd   m3, m3
-pmaddwd   m2, m2
-paddd m0, m3
-paddd m0, m2
-%else
 punpcklbw m1, m1
 psraw m1, 8
 psubw m2, m1
 pmaddwd   m2, m2
 paddd m0, m2
-%endif
 jg .loop
 HADDD m0, m1
 movd eax, m0
 RET
-%endmacro
-
-INIT_MMX mmx
-SSD_INT8_VS_INT16
-INIT_XMM sse2
-SSD_INT8_VS_INT16
diff --git a/libavcodec/x86/svq1enc_init.c b/libavcodec/x86/svq1enc_init.c
index 40b4b0e183..787a5245f3 100644
--- a/libavcodec/x86/svq1enc_init.c
+++ b/libavcodec/x86/svq1enc_init.c
@@ -24,8 +24,6 @@
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/svq1enc.h"
 
-int ff_ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2,
- intptr_t size);
 int ff_ssd_int8_vs_int16_sse2(const int8_t *pix1, const int16_t *pix2,
   intptr_t size);
 
@@ -33,9 +31,6 @@ av_cold void ff_svq1enc_init_x86(SVQ1EncContext *c)
 {
 int cpu_flags = av_get_cpu_flags();
 
-if (EXTERNAL_MMX(cpu_flags)) {
-c->ssd_int8_vs_int16 = ff_ssd_int8_vs_int16_mmx;
-}
 if (EXTERNAL_SSE2(cpu_flags)) {
 c->ssd_int8_vs_int16 = ff_ssd_int8_vs_int16_sse2;
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/x86/fmtconvert: Remove obsolete SSE functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Fri Jun 10 21:00:55 2022 +0200| [4038b5b209cd8a0a3cb559d1073e9b22196a8ace] | 
committer: Andreas Rheinhardt

avcodec/x86/fmtconvert: Remove obsolete SSE functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4038b5b209cd8a0a3cb559d1073e9b22196a8ace
---

 libavcodec/x86/fmtconvert.asm| 36 
 libavcodec/x86/fmtconvert_init.c |  7 ---
 2 files changed, 43 deletions(-)

diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index 8f62a0a093..e70df4662d 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -44,35 +44,17 @@ cglobal int32_to_float_fmul_scalar, 4, 4, %1, dst, src, 
mul, len
 add dstq, lenq
 neg lenq
 .loop:
-%if cpuflag(sse2)
 cvtdq2ps  m1, [srcq+lenq   ]
 cvtdq2ps  m2, [srcq+lenq+16]
-%else
-cvtpi2ps  m1, [srcq+lenq   ]
-cvtpi2ps  m3, [srcq+lenq+ 8]
-cvtpi2ps  m2, [srcq+lenq+16]
-cvtpi2ps  m4, [srcq+lenq+24]
-movlhps   m1, m3
-movlhps   m2, m4
-%endif
 mulps m1, m0
 mulps m2, m0
 mova  [dstq+lenq   ], m1
 mova  [dstq+lenq+16], m2
 add lenq, 32
 jl .loop
-%if notcpuflag(sse2)
-;; cvtpi2ps switches to MMX even if the source is a memory location
-;; possible an error in documentation since every tested CPU disagrees with
-;; that. Use emms anyway since the vast majority of machines will use the
-;; SSE2 variant
-emms
-%endif
 RET
 %endmacro
 
-INIT_XMM sse
-INT32_TO_FLOAT_FMUL_SCALAR 5
 INIT_XMM sse2
 INT32_TO_FLOAT_FMUL_SCALAR 3
 
@@ -89,17 +71,8 @@ cglobal int32_to_float_fmul_array8, 5, 5, 5, c, dst, src, 
mul, len
 .loop:
 movss m0, [mulq]
 SPLATDm0
-%if cpuflag(sse2)
 cvtdq2ps  m1, [srcq+lenq   ]
 cvtdq2ps  m2, [srcq+lenq+16]
-%else
-cvtpi2ps  m1, [srcq+lenq   ]
-cvtpi2ps  m3, [srcq+lenq+ 8]
-cvtpi2ps  m2, [srcq+lenq+16]
-cvtpi2ps  m4, [srcq+lenq+24]
-movlhps   m1, m3
-movlhps   m2, m4
-%endif
 mulps m1, m0
 mulps m2, m0
 mova  [dstq+lenq   ], m1
@@ -107,18 +80,9 @@ cglobal int32_to_float_fmul_array8, 5, 5, 5, c, dst, src, 
mul, len
 add mulq, 4
 add lenq, 32
 jl .loop
-%if notcpuflag(sse2)
-;; cvtpi2ps switches to MMX even if the source is a memory location
-;; possible an error in documentation since every tested CPU disagrees with
-;; that. Use emms anyway since the vast majority of machines will use the
-;; SSE2 variant
-emms
-%endif
 RET
 %endmacro
 
-INIT_XMM sse
-INT32_TO_FLOAT_FMUL_ARRAY8
 INIT_XMM sse2
 INT32_TO_FLOAT_FMUL_ARRAY8
 
diff --git a/libavcodec/x86/fmtconvert_init.c b/libavcodec/x86/fmtconvert_init.c
index df097054e4..58b396856e 100644
--- a/libavcodec/x86/fmtconvert_init.c
+++ b/libavcodec/x86/fmtconvert_init.c
@@ -29,10 +29,7 @@
 
 #if HAVE_X86ASM
 
-void ff_int32_to_float_fmul_scalar_sse (float *dst, const int32_t *src, float 
mul, int len);
 void ff_int32_to_float_fmul_scalar_sse2(float *dst, const int32_t *src, float 
mul, int len);
-void ff_int32_to_float_fmul_array8_sse (FmtConvertContext *c, float *dst, 
const int32_t *src,
-const float *mul, int len);
 void ff_int32_to_float_fmul_array8_sse2(FmtConvertContext *c, float *dst, 
const int32_t *src,
 const float *mul, int len);
 
@@ -43,10 +40,6 @@ av_cold void ff_fmt_convert_init_x86(FmtConvertContext *c, 
AVCodecContext *avctx
 #if HAVE_X86ASM
 int cpu_flags = av_get_cpu_flags();
 
-if (EXTERNAL_SSE(cpu_flags)) {
-c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse;
-c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_sse;
-}
 if (EXTERNAL_SSE2(cpu_flags)) {
 c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse2;
 c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_sse2;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/x86/hpeldsp_vp3: Remove obsolete 3dnow functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Fri Jun 10 21:18:52 2022 +0200| [aa8a2019928ae6ff6ec4382020b26b891f64d4bd] | 
committer: Andreas Rheinhardt

avcodec/x86/hpeldsp_vp3: Remove obsolete 3dnow functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=aa8a2019928ae6ff6ec4382020b26b891f64d4bd
---

 libavcodec/x86/hpeldsp_vp3.asm| 16 ++--
 libavcodec/x86/hpeldsp_vp3_init.c | 13 -
 2 files changed, 2 insertions(+), 27 deletions(-)

diff --git a/libavcodec/x86/hpeldsp_vp3.asm b/libavcodec/x86/hpeldsp_vp3.asm
index cba96d06cb..88ca8e8e0a 100644
--- a/libavcodec/x86/hpeldsp_vp3.asm
+++ b/libavcodec/x86/hpeldsp_vp3.asm
@@ -23,7 +23,7 @@
 SECTION .text
 
 ; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, 
ptrdiff_t line_size, int h)
-%macro PUT_NO_RND_PIXELS8_X2_EXACT 0
+INIT_MMX mmxext
 cglobal put_no_rnd_pixels8_x2_exact, 4,5
 lea  r4, [r2*3]
 pcmpeqb  m6, m6
@@ -61,16 +61,10 @@ cglobal put_no_rnd_pixels8_x2_exact, 4,5
 sub r3d, 4
 jg .loop
 REP_RET
-%endmacro
-
-INIT_MMX mmxext
-PUT_NO_RND_PIXELS8_X2_EXACT
-INIT_MMX 3dnow
-PUT_NO_RND_PIXELS8_X2_EXACT
 
 
 ; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, 
ptrdiff_t line_size, int h)
-%macro PUT_NO_RND_PIXELS8_Y2_EXACT 0
+INIT_MMX mmxext
 cglobal put_no_rnd_pixels8_y2_exact, 4,5
 lea  r4, [r2*3]
 mova m0, [r1]
@@ -103,9 +97,3 @@ cglobal put_no_rnd_pixels8_y2_exact, 4,5
 sub r3d, 4
 jg .loop
 REP_RET
-%endmacro
-
-INIT_MMX mmxext
-PUT_NO_RND_PIXELS8_Y2_EXACT
-INIT_MMX 3dnow
-PUT_NO_RND_PIXELS8_Y2_EXACT
diff --git a/libavcodec/x86/hpeldsp_vp3_init.c 
b/libavcodec/x86/hpeldsp_vp3_init.c
index 5979f4123c..1dbd1ba6f9 100644
--- a/libavcodec/x86/hpeldsp_vp3_init.c
+++ b/libavcodec/x86/hpeldsp_vp3_init.c
@@ -28,25 +28,12 @@
 void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block,
const uint8_t *pixels,
ptrdiff_t line_size, int h);
-void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block,
-  const uint8_t *pixels,
-  ptrdiff_t line_size, int h);
 void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block,
const uint8_t *pixels,
ptrdiff_t line_size, int h);
-void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block,
-  const uint8_t *pixels,
-  ptrdiff_t line_size, int h);
 
 av_cold void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int 
flags)
 {
-if (EXTERNAL_AMD3DNOW(cpu_flags)) {
-if (flags & AV_CODEC_FLAG_BITEXACT) {
-c->put_no_rnd_pixels_tab[1][1] = 
ff_put_no_rnd_pixels8_x2_exact_3dnow;
-c->put_no_rnd_pixels_tab[1][2] = 
ff_put_no_rnd_pixels8_y2_exact_3dnow;
-}
-}
-
 if (EXTERNAL_MMXEXT(cpu_flags)) {
 if (flags & AV_CODEC_FLAG_BITEXACT) {
 c->put_no_rnd_pixels_tab[1][1] = 
ff_put_no_rnd_pixels8_x2_exact_mmxext;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/x86/h264chroma: Remove obsolete 3dnow functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Sat Jun 11 00:16:31 2022 +0200| [67f0db7bc5c42cc861add0c660d438f6bdc4ccfe] | 
committer: Andreas Rheinhardt

avcodec/x86/h264chroma: Remove obsolete 3dnow functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=67f0db7bc5c42cc861add0c660d438f6bdc4ccfe
---

 libavcodec/x86/h264_chromamc.asm | 4 
 libavcodec/x86/h264chroma_init.c | 9 -
 2 files changed, 13 deletions(-)

diff --git a/libavcodec/x86/h264_chromamc.asm b/libavcodec/x86/h264_chromamc.asm
index ba6f4af3b0..e562efd69d 100644
--- a/libavcodec/x86/h264_chromamc.asm
+++ b/libavcodec/x86/h264_chromamc.asm
@@ -446,10 +446,6 @@ chroma_mc4_mmx_func avg, h264
 chroma_mc4_mmx_func avg, rv40
 chroma_mc2_mmx_func avg, h264
 
-INIT_MMX 3dnow
-chroma_mc8_mmx_func avg, h264, _rnd
-chroma_mc4_mmx_func avg, h264
-
 %macro chroma_mc8_ssse3_func 2-3
 cglobal %1_%2_chroma_mc8%3, 6, 7, 8
 mov  r6d, r5d
diff --git a/libavcodec/x86/h264chroma_init.c b/libavcodec/x86/h264chroma_init.c
index 36bf29df02..7c0f492178 100644
--- a/libavcodec/x86/h264chroma_init.c
+++ b/libavcodec/x86/h264chroma_init.c
@@ -28,15 +28,11 @@ void ff_put_h264_chroma_mc8_rnd_mmx  (uint8_t *dst, uint8_t 
*src,
   ptrdiff_t stride, int h, int x, int y);
 void ff_avg_h264_chroma_mc8_rnd_mmxext(uint8_t *dst, uint8_t *src,
ptrdiff_t stride, int h, int x, int y);
-void ff_avg_h264_chroma_mc8_rnd_3dnow(uint8_t *dst, uint8_t *src,
-  ptrdiff_t stride, int h, int x, int y);
 
 void ff_put_h264_chroma_mc4_mmx  (uint8_t *dst, uint8_t *src,
   ptrdiff_t stride, int h, int x, int y);
 void ff_avg_h264_chroma_mc4_mmxext   (uint8_t *dst, uint8_t *src,
   ptrdiff_t stride, int h, int x, int y);
-void ff_avg_h264_chroma_mc4_3dnow(uint8_t *dst, uint8_t *src,
-  ptrdiff_t stride, int h, int x, int y);
 
 void ff_put_h264_chroma_mc2_mmxext   (uint8_t *dst, uint8_t *src,
   ptrdiff_t stride, int h, int x, int y);
@@ -77,11 +73,6 @@ av_cold void ff_h264chroma_init_x86(H264ChromaContext *c, 
int bit_depth)
 c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_mmx;
 }
 
-if (EXTERNAL_AMD3DNOW(cpu_flags) && !high_bit_depth) {
-c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_3dnow;
-c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_3dnow;
-}
-
 if (EXTERNAL_MMXEXT(cpu_flags) && !high_bit_depth) {
 c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_mmxext;
 c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_mmxext;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/x86/h264_qpel: Remove obsolete MMXEXT functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Thu Jun  9 03:32:03 2022 +0200| [4011a76494a5ff6844312813bc753aae8e54c2f0] | 
committer: Andreas Rheinhardt

avcodec/x86/h264_qpel: Remove obsolete MMXEXT functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4011a76494a5ff6844312813bc753aae8e54c2f0
---

 libavcodec/x86/h264_qpel.c | 116 -
 libavcodec/x86/h264_qpel_10bit.asm |   2 +-
 libavcodec/x86/h264_qpel_8bit.asm  |   7 ---
 3 files changed, 39 insertions(+), 86 deletions(-)

diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c
index dda50ded89..5aa12ff81f 100644
--- a/libavcodec/x86/h264_qpel.c
+++ b/libavcodec/x86/h264_qpel.c
@@ -236,7 +236,11 @@ static av_always_inline void ff_ ## OPNAME ## 
h264_qpel16_hv_lowpass_ ## MMX(uin
 #define ff_put_h264_qpel8or16_hv2_lowpass_sse2 
ff_put_h264_qpel8or16_hv2_lowpass_mmxext
 #define ff_avg_h264_qpel8or16_hv2_lowpass_sse2 
ff_avg_h264_qpel8or16_hv2_lowpass_mmxext
 
-#define H264_MC(OPNAME, SIZE, MMX, ALIGN) \
+#define H264_MC_C_H(OPNAME, SIZE, MMX, ALIGN) \
+H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\
+H264_MC_H(OPNAME, SIZE, MMX, ALIGN)\
+
+#define H264_MC_C_V_H_HV(OPNAME, SIZE, MMX, ALIGN) \
 H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\
 H264_MC_V(OPNAME, SIZE, MMX, ALIGN)\
 H264_MC_H(OPNAME, SIZE, MMX, ALIGN)\
@@ -372,13 +376,9 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## 
MMX(uint8_t *dst, const uin
 ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_mmxext(dst, halfV+3, halfHV, 
stride, SIZE, SIZE);\
 }\
 
-#define H264_MC_4816(MMX)\
-H264_MC(put_, 4, MMX, 8)\
-H264_MC(put_, 8, MMX, 8)\
-H264_MC(put_, 16,MMX, 8)\
-H264_MC(avg_, 4, MMX, 8)\
-H264_MC(avg_, 8, MMX, 8)\
-H264_MC(avg_, 16,MMX, 8)\
+#define H264_MC(QPEL, SIZE, MMX, ALIGN)\
+QPEL(put_, SIZE, MMX, ALIGN) \
+QPEL(avg_, SIZE, MMX, ALIGN) \
 
 #define H264_MC_816(QPEL, XMM)\
 QPEL(put_, 8, XMM, 16)\
@@ -397,7 +397,9 @@ QPEL_H264_H_XMM(avg_,AVG_MMXEXT_OP, ssse3)
 QPEL_H264_HV_XMM(put_,   PUT_OP, ssse3)
 QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, ssse3)
 
-H264_MC_4816(mmxext)
+H264_MC(H264_MC_C_V_H_HV, 4, mmxext, 8)
+H264_MC(H264_MC_C_H, 8, mmxext, 8)
+H264_MC(H264_MC_C_H, 16, mmxext, 8)
 H264_MC_816(H264_MC_V, sse2)
 H264_MC_816(H264_MC_HV, sse2)
 H264_MC_816(H264_MC_H, ssse3)
@@ -409,13 +411,9 @@ H264_MC_816(H264_MC_HV, ssse3)
 void ff_ ## OP ## _h264_qpel ## NUM ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT \
 (uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
 
-#define LUMA_MC_ALL(DEPTH, TYPE, OPT) \
+#define LUMA_MC_4(DEPTH, TYPE, OPT) \
 LUMA_MC_OP(put,  4, DEPTH, TYPE, OPT) \
-LUMA_MC_OP(avg,  4, DEPTH, TYPE, OPT) \
-LUMA_MC_OP(put,  8, DEPTH, TYPE, OPT) \
-LUMA_MC_OP(avg,  8, DEPTH, TYPE, OPT) \
-LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \
-LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT)
+LUMA_MC_OP(avg,  4, DEPTH, TYPE, OPT)
 
 #define LUMA_MC_816(DEPTH, TYPE, OPT) \
 LUMA_MC_OP(put,  8, DEPTH, TYPE, OPT) \
@@ -423,22 +421,22 @@ void ff_ ## OP ## _h264_qpel ## NUM ## _ ## TYPE ## _ ## 
DEPTH ## _ ## OPT \
 LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \
 LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT)
 
-LUMA_MC_ALL(10, mc00, mmxext)
-LUMA_MC_ALL(10, mc10, mmxext)
-LUMA_MC_ALL(10, mc20, mmxext)
-LUMA_MC_ALL(10, mc30, mmxext)
-LUMA_MC_ALL(10, mc01, mmxext)
-LUMA_MC_ALL(10, mc11, mmxext)
-LUMA_MC_ALL(10, mc21, mmxext)
-LUMA_MC_ALL(10, mc31, mmxext)
-LUMA_MC_ALL(10, mc02, mmxext)
-LUMA_MC_ALL(10, mc12, mmxext)
-LUMA_MC_ALL(10, mc22, mmxext)
-LUMA_MC_ALL(10, mc32, mmxext)
-LUMA_MC_ALL(10, mc03, mmxext)
-LUMA_MC_ALL(10, mc13, mmxext)
-LUMA_MC_ALL(10, mc23, mmxext)
-LUMA_MC_ALL(10, mc33, mmxext)
+LUMA_MC_4(10, mc00, mmxext)
+LUMA_MC_4(10, mc10, mmxext)
+LUMA_MC_4(10, mc20, mmxext)
+LUMA_MC_4(10, mc30, mmxext)
+LUMA_MC_4(10, mc01, mmxext)
+LUMA_MC_4(10, mc11, mmxext)
+LUMA_MC_4(10, mc21, mmxext)
+LUMA_MC_4(10, mc31, mmxext)
+LUMA_MC_4(10, mc02, mmxext)
+LUMA_MC_4(10, mc12, mmxext)
+LUMA_MC_4(10, mc22, mmxext)
+LUMA_MC_4(10, mc32, mmxext)
+LUMA_MC_4(10, mc03, mmxext)
+LUMA_MC_4(10, mc13, mmxext)
+LUMA_MC_4(10, mc23, mmxext)
+LUMA_MC_4(10, mc33, mmxext)
 
 LUMA_MC_816(10, mc00, sse2)
 LUMA_MC_816(10, mc10, sse2)
@@ -463,50 +461,18 @@ LUMA_MC_816(10, mc13, sse2)
 LUMA_MC_816(10, mc23, sse2)
 LUMA_MC_816(10, mc33, sse2)
 
-#define QPEL16_OPMC(OP, MC, MMX)\
-void ff_ ## OP ## _h264_qpel16_ ## MC ## _10_ ## MMX(uint8_t *dst, const 
uint8_t *src, ptrdiff_t stride){\
-ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst   , src   , stride);\
-ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst+16, src+16, stride);\
-src += 8*stride;\
-dst += 8*stride;\
-ff_ ## OP ## _h264_qp

[FFmpeg-cvslog] swscale/x86/rgb2rgb: Remove obsolete MMX, 3dnow functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Sat Jun 11 01:07:57 2022 +0200| [608319a311a31f7d85333a7b08286c00be38eab6] | 
committer: Andreas Rheinhardt

swscale/x86/rgb2rgb: Remove obsolete MMX, 3dnow functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=608319a311a31f7d85333a7b08286c00be38eab6
---

 libswscale/x86/rgb2rgb.c  |  26 
 libswscale/x86/rgb2rgb_template.c | 123 +-
 2 files changed, 15 insertions(+), 134 deletions(-)

diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c
index 0ab139aca4..b325e5dbd5 100644
--- a/libswscale/x86/rgb2rgb.c
+++ b/libswscale/x86/rgb2rgb.c
@@ -85,20 +85,11 @@ DECLARE_ALIGNED(8, extern const uint64_t, ff_bgr2UVOffset);
 
 // Note: We have C, MMX, MMXEXT, 3DNOW versions, there is no 3DNOW + MMXEXT 
one.
 
-#define COMPILE_TEMPLATE_MMXEXT 0
-#define COMPILE_TEMPLATE_AMD3DNOW 0
 #define COMPILE_TEMPLATE_SSE2 0
 #define COMPILE_TEMPLATE_AVX 0
 
-//MMX versions
-#undef RENAME
-#define RENAME(a) a ## _mmx
-#include "rgb2rgb_template.c"
-
 // MMXEXT versions
 #undef RENAME
-#undef COMPILE_TEMPLATE_MMXEXT
-#define COMPILE_TEMPLATE_MMXEXT 1
 #define RENAME(a) a ## _mmxext
 #include "rgb2rgb_template.c"
 
@@ -116,19 +107,6 @@ DECLARE_ALIGNED(8, extern const uint64_t, ff_bgr2UVOffset);
 #define RENAME(a) a ## _avx
 #include "rgb2rgb_template.c"
 
-//3DNOW versions
-#undef RENAME
-#undef COMPILE_TEMPLATE_MMXEXT
-#undef COMPILE_TEMPLATE_SSE2
-#undef COMPILE_TEMPLATE_AVX
-#undef COMPILE_TEMPLATE_AMD3DNOW
-#define COMPILE_TEMPLATE_MMXEXT 0
-#define COMPILE_TEMPLATE_SSE2 0
-#define COMPILE_TEMPLATE_AVX 0
-#define COMPILE_TEMPLATE_AMD3DNOW 1
-#define RENAME(a) a ## _3dnow
-#include "rgb2rgb_template.c"
-
 /*
  RGB15->RGB16 original by Strepto/Astral
  ported to gcc & bugfixed : A'rpi
@@ -165,10 +143,6 @@ av_cold void rgb2rgb_init_x86(void)
 int cpu_flags = av_get_cpu_flags();
 
 #if HAVE_INLINE_ASM
-if (INLINE_MMX(cpu_flags))
-rgb2rgb_init_mmx();
-if (INLINE_AMD3DNOW(cpu_flags))
-rgb2rgb_init_3dnow();
 if (INLINE_MMXEXT(cpu_flags))
 rgb2rgb_init_mmxext();
 if (INLINE_SSE2(cpu_flags))
diff --git a/libswscale/x86/rgb2rgb_template.c 
b/libswscale/x86/rgb2rgb_template.c
index ae2469e663..4aba25dd51 100644
--- a/libswscale/x86/rgb2rgb_template.c
+++ b/libswscale/x86/rgb2rgb_template.c
@@ -36,34 +36,14 @@
 #undef SFENCE
 #undef PAVGB
 
-#if COMPILE_TEMPLATE_AMD3DNOW
-#define PREFETCH  "prefetch"
-#define PAVGB "pavgusb"
-#elif COMPILE_TEMPLATE_MMXEXT
 #define PREFETCH "prefetchnta"
 #define PAVGB "pavgb"
-#else
-#define PREFETCH  " # nop"
-#endif
-
-#if COMPILE_TEMPLATE_AMD3DNOW
-/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
-#define EMMS "femms"
-#else
-#define EMMS "emms"
-#endif
-
-#if COMPILE_TEMPLATE_MMXEXT
 #define MOVNTQ "movntq"
 #define SFENCE "sfence"
-#else
-#define MOVNTQ "movq"
-#define SFENCE " # nop"
-#endif
 
-#if !COMPILE_TEMPLATE_SSE2
+#define EMMS "emms"
 
-#if !COMPILE_TEMPLATE_AMD3DNOW
+#if !COMPILE_TEMPLATE_SSE2
 
 static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int 
src_size)
 {
@@ -1353,9 +1333,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, 
uint8_t *ydst, uint8_t
  SFENCE" \n\t"
  :::"memory");
 }
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
-#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
 static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int 
srcWidth, int srcHeight, int srcStride, int dstStride)
 {
 int x,y;
@@ -1453,9 +1431,7 @@ static inline void RENAME(planar2x)(const uint8_t *src, 
uint8_t *dst, int srcWid
  SFENCE" \n\t"
  :::"memory");
 }
-#endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */
 
-#if !COMPILE_TEMPLATE_AMD3DNOW
 /**
  * Height should be a multiple of 2 and width should be a multiple of 16.
  * (If this is a problem for anyone then tell me, and I will fix it.)
@@ -1559,7 +1535,6 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, 
uint8_t *ydst, uint8_t
  SFENCE" \n\t"
  :::"memory");
 }
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
 /**
  * Height should be a multiple of 2 and width should be a multiple of 2.
@@ -1673,7 +1648,6 @@ static inline void RENAME(rgb24toyv12)(const uint8_t 
*src, uint8_t *ydst, uint8_
 "1: \n\t"
 PREFETCH" 64(%0, %%"FF_REG_d")  \n\t"
 PREFETCH" 64(%1, %%"FF_REG_d")  \n\t"
-#if COMPIL

[FFmpeg-cvslog] swscale/x86/yuv2rgb: Remove obsolete MMX functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Sat Jun 11 01:13:22 2022 +0200| [2831837182fe26f0a19a4d366f3f0553311f1291] | 
committer: Andreas Rheinhardt

swscale/x86/yuv2rgb: Remove obsolete MMX functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2831837182fe26f0a19a4d366f3f0553311f1291
---

 libswscale/x86/yuv2rgb.c  | 15 +--
 libswscale/x86/yuv2rgb_template.c |  5 +++--
 libswscale/x86/yuv_2_rgb.asm  |  5 +++--
 3 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
index 47f45bd7c2..6754062245 100644
--- a/libswscale/x86/yuv2rgb.c
+++ b/libswscale/x86/yuv2rgb.c
@@ -44,23 +44,22 @@
 //MMX versions
 #if HAVE_MMX
 #undef RENAME
-#undef COMPILE_TEMPLATE_MMXEXT
-#define COMPILE_TEMPLATE_MMXEXT 0
+#define COMPILE_TEMPLATE_MMX
 #define RENAME(a) a ## _mmx
 #include "yuv2rgb_template.c"
+#undef COMPILE_TEMPLATE_MMX
 #endif /* HAVE_MMX */
 
 // MMXEXT versions
 #undef RENAME
-#undef COMPILE_TEMPLATE_MMXEXT
-#define COMPILE_TEMPLATE_MMXEXT 1
+#define COMPILE_TEMPLATE_MMXEXT
 #define RENAME(a) a ## _mmxext
 #include "yuv2rgb_template.c"
+#undef COMPILE_TEMPLATE_MMXEXT
 
 //SSSE3 versions
 #undef RENAME
-#undef COMPILE_TEMPLATE_MMXEXT
-#define COMPILE_TEMPLATE_MMXEXT 0
+#define COMPILE_TEMPLATE_SSSE3
 #define RENAME(a) a ## _ssse3
 #include "yuv2rgb_template.c"
 
@@ -127,10 +126,6 @@ av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
 break;
 } else
 return yuv420_bgr32_mmx;
-case AV_PIX_FMT_RGB24:
-return yuv420_rgb24_mmx;
-case AV_PIX_FMT_BGR24:
-return yuv420_bgr24_mmx;
 case AV_PIX_FMT_RGB565:
 return yuv420_rgb16_mmx;
 case AV_PIX_FMT_RGB555:
diff --git a/libswscale/x86/yuv2rgb_template.c 
b/libswscale/x86/yuv2rgb_template.c
index d506f75e15..596943bb73 100644
--- a/libswscale/x86/yuv2rgb_template.c
+++ b/libswscale/x86/yuv2rgb_template.c
@@ -47,7 +47,7 @@ extern void RENAME(ff_yuv_420_bgr24)(x86_reg index, uint8_t 
*image, const uint8_
  const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
  const uint8_t *py_2index);
 
-#if !COMPILE_TEMPLATE_MMXEXT
+#ifndef COMPILE_TEMPLATE_MMXEXT
 extern void RENAME(ff_yuv_420_rgb15)(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
  const uint8_t *pv_index, const uint64_t 
*pointer_c_dither,
  const uint8_t *py_2index);
@@ -165,6 +165,7 @@ static inline int RENAME(yuva420_bgr32)(SwsContext *c, 
const uint8_t *src[],
 }
 #endif
 
+#if !defined(COMPILE_TEMPLATE_MMX)
 static inline int RENAME(yuv420_rgb24)(SwsContext *c, const uint8_t *src[],
int srcStride[],
int srcSliceY, int srcSliceH,
@@ -192,4 +193,4 @@ static inline int RENAME(yuv420_bgr24)(SwsContext *c, const 
uint8_t *src[],
 }
 return srcSliceH;
 }
-
+#endif
diff --git a/libswscale/x86/yuv_2_rgb.asm b/libswscale/x86/yuv_2_rgb.asm
index f968b3a0a2..c5fa3ee690 100644
--- a/libswscale/x86/yuv_2_rgb.asm
+++ b/libswscale/x86/yuv_2_rgb.asm
@@ -69,6 +69,9 @@ SECTION .text
 %ifidn %1, yuva
 %define parameters index, image, pu_index, pv_index, pointer_c_dither, 
py_2index, pa_2index
 %define GPR_num 7
+%else
+%define parameters index, image, pu_index, pv_index, pointer_c_dither, 
py_2index
+%define GPR_num 6
 %endif
 %else
 %define parameters index, image, pu_index, pv_index, pointer_c_dither, 
py_2index
@@ -356,8 +359,6 @@ REP_RET
 %endmacro
 
 INIT_MMX mmx
-yuv2rgb_fn yuv,  rgb, 24
-yuv2rgb_fn yuv,  bgr, 24
 yuv2rgb_fn yuv,  rgb, 32
 yuv2rgb_fn yuv,  bgr, 32
 yuv2rgb_fn yuva, rgb, 32

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/x86/hpeldsp: Remove obsolete MMX/3dnow functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Fri Jun 10 22:42:01 2022 +0200| [a51279bbdea0d6db920d71980262bccd0ce78226] | 
committer: Andreas Rheinhardt

avcodec/x86/hpeldsp: Remove obsolete MMX/3dnow functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a51279bbdea0d6db920d71980262bccd0ce78226
---

 libavcodec/x86/fpel.asm   |   1 -
 libavcodec/x86/hpeldsp.asm|  72 ++--
 libavcodec/x86/hpeldsp.h  |   2 -
 libavcodec/x86/hpeldsp_init.c | 108 +++---
 libavcodec/x86/rnd_template.c |   2 +
 5 files changed, 22 insertions(+), 163 deletions(-)

diff --git a/libavcodec/x86/fpel.asm b/libavcodec/x86/fpel.asm
index d38a1b1035..ebe8e43750 100644
--- a/libavcodec/x86/fpel.asm
+++ b/libavcodec/x86/fpel.asm
@@ -91,7 +91,6 @@ cglobal %1_pixels%2, 4,5,4
 INIT_MMX mmx
 OP_PIXELS put, 4
 OP_PIXELS put, 8
-OP_PIXELS avg, 8
 OP_PIXELS put, 16
 OP_PIXELS avg, 16
 
diff --git a/libavcodec/x86/hpeldsp.asm b/libavcodec/x86/hpeldsp.asm
index ce5d7a4e28..b3a270a173 100644
--- a/libavcodec/x86/hpeldsp.asm
+++ b/libavcodec/x86/hpeldsp.asm
@@ -83,8 +83,6 @@ cglobal put_pixels8_x2, 4,5
 
 INIT_MMX mmxext
 PUT_PIXELS8_X2
-INIT_MMX 3dnow
-PUT_PIXELS8_X2
 
 
 ; void ff_put_pixels16_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t 
line_size, int h)
@@ -127,15 +125,13 @@ cglobal put_pixels16_x2, 4,5
 
 INIT_MMX mmxext
 PUT_PIXELS_16
-INIT_MMX 3dnow
-PUT_PIXELS_16
 ; The 8_X2 macro can easily be used here
 INIT_XMM sse2
 PUT_PIXELS8_X2
 
 
 ; void ff_put_no_rnd_pixels8_x2(uint8_t *block, const uint8_t *pixels, 
ptrdiff_t line_size, int h)
-%macro PUT_NO_RND_PIXELS8_X2 0
+INIT_MMX mmxext
 cglobal put_no_rnd_pixels8_x2, 4,5
 mova m6, [pb_1]
 lea  r4, [r2*2]
@@ -167,12 +163,6 @@ cglobal put_no_rnd_pixels8_x2, 4,5
 sub r3d, 4
 jne .loop
 REP_RET
-%endmacro
-
-INIT_MMX mmxext
-PUT_NO_RND_PIXELS8_X2
-INIT_MMX 3dnow
-PUT_NO_RND_PIXELS8_X2
 
 
 ; void ff_put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t 
line_size, int h)
@@ -209,15 +199,13 @@ cglobal put_pixels8_y2, 4,5
 
 INIT_MMX mmxext
 PUT_PIXELS8_Y2
-INIT_MMX 3dnow
-PUT_PIXELS8_Y2
 ; actually, put_pixels16_y2_sse2
 INIT_XMM sse2
 PUT_PIXELS8_Y2
 
 
 ; void ff_put_no_rnd_pixels8_y2(uint8_t *block, const uint8_t *pixels, 
ptrdiff_t line_size, int h)
-%macro PUT_NO_RND_PIXELS8_Y2 0
+INIT_MMX mmxext
 cglobal put_no_rnd_pixels8_y2, 4,5
 mova m6, [pb_1]
 lea  r4, [r2+r2]
@@ -245,42 +233,6 @@ cglobal put_no_rnd_pixels8_y2, 4,5
 sub r3d, 4
 jne .loop
 REP_RET
-%endmacro
-
-INIT_MMX mmxext
-PUT_NO_RND_PIXELS8_Y2
-INIT_MMX 3dnow
-PUT_NO_RND_PIXELS8_Y2
-
-
-; void ff_avg_pixels8(uint8_t *block, const uint8_t *pixels, ptrdiff_t 
line_size, int h)
-%macro AVG_PIXELS8 0
-cglobal avg_pixels8, 4,5
-lea  r4, [r2*2]
-.loop:
-mova m0, [r0]
-mova m1, [r0+r2]
-PAVGBm0, [r1]
-PAVGBm1, [r1+r2]
-mova   [r0], m0
-mova[r0+r2], m1
-add  r1, r4
-add  r0, r4
-mova m0, [r0]
-mova m1, [r0+r2]
-PAVGBm0, [r1]
-PAVGBm1, [r1+r2]
-add  r1, r4
-mova   [r0], m0
-mova[r0+r2], m1
-add  r0, r4
-sub r3d, 4
-jne .loop
-REP_RET
-%endmacro
-
-INIT_MMX 3dnow
-AVG_PIXELS8
 
 
 ; void ff_avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t 
line_size, int h)
@@ -291,10 +243,6 @@ cglobal avg_pixels16_x2, 4,5,4
 cglobal avg_pixels8_x2, 4,5
 %endif
 lea  r4, [r2*2]
-%if notcpuflag(mmxext)
-pcmpeqd  m5, m5
-paddbm5, m5
-%endif
 .loop:
 movu m0, [r1]
 movu m2, [r1+r2]
@@ -335,12 +283,8 @@ cglobal avg_pixels8_x2, 4,5
 REP_RET
 %endmacro
 
-INIT_MMX mmx
-AVG_PIXELS8_X2
 INIT_MMX mmxext
 AVG_PIXELS8_X2
-INIT_MMX 3dnow
-AVG_PIXELS8_X2
 ; actually avg_pixels16_x2
 INIT_XMM sse2
 AVG_PIXELS8_X2
@@ -384,8 +328,6 @@ cglobal avg_pixels8_y2, 4,5
 
 INIT_MMX mmxext
 AVG_PIXELS8_Y2
-INIT_MMX 3dnow
-AVG_PIXELS8_Y2
 ; actually avg_pixels16_y2
 INIT_XMM sse2
 AVG_PIXELS8_Y2
@@ -394,7 +336,7 @@ AVG_PIXELS8_Y2
 ; void ff_avg_pixels8_xy2(uint8_t *block, const uint8_t *pixels, ptrdiff_t 
line_size, int h)
 ; Note this is not correctly rounded, and is therefore used for
 ; not-bitexact output
-%macro AVG_APPROX_PIXELS8_XY2 0
+INIT_MMX mmxext
 cglobal avg_approx_pixels8_xy2, 4,5
 mova m6, [pb_1]
 lea  r4, [r2*2]
@@ -429,12 +371,6 @@ cglobal avg_approx_pixels8_xy2, 4,5
 sub r3d, 4
 jne .loop
 REP_RET
-%endmacro
-
-

[FFmpeg-cvslog] swscale/x86/swscale: Remove obsolete and harmful MMX(EXT) functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Thu Jun  9 16:57:34 2022 +0200| [a05f22eaf393177b94432431c145cbc5ba10390a] | 
committer: Andreas Rheinhardt

swscale/x86/swscale: Remove obsolete and harmful MMX(EXT) functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT, SSE and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2). So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.

Moreover, some of the removed code was buggy/not bitexact
and lead to failures involving the f32le and f32be versions of
gray, gbrp and gbrap on x86-32 when SSE2 was not disabled.
See e.g.
https://fate.ffmpeg.org/report.cgi?time=20220609221253&slot=x86_32-debian-kfreebsd-gcc-4.4-cpuflags-mmx

Notice that yuv2yuvX_mmx is not removed, because it is used
by SSE3 and AVX2 as fallback in case of unaligned data and
also for tail processing. I don't know why yuv2yuvX_mmxext
isn't being used for this; an earlier version [1] of
554c2bc7086f49ef5a6a989ad6bc4bc11807eb6f used it, but
the version that was eventually applied does not.

[1]: https://ffmpeg.org/pipermail/ffmpeg-devel/2020-November/272124.html

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a05f22eaf393177b94432431c145cbc5ba10390a
---

 libswscale/x86/input.asm  | 82 +-
 libswscale/x86/output.asm | 30 ++
 libswscale/x86/scale.asm  | 45 -
 libswscale/x86/swscale.c  | 83 +--
 libswscale/x86/swscale_template.c | 30 --
 5 files changed, 38 insertions(+), 232 deletions(-)

diff --git a/libswscale/x86/input.asm b/libswscale/x86/input.asm
index fcdfe2fcd8..6de6733faa 100644
--- a/libswscale/x86/input.asm
+++ b/libswscale/x86/input.asm
@@ -133,23 +133,18 @@ SECTION .text
 ; %2 = rgb or bgr
 %macro RGB24_TO_Y_FN 2-3
 cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, table
-%if mmsize == 8
-mova   m5, [%2_Ycoeff_12x4]
-mova   m6, [%2_Ycoeff_3x56]
-%define coeff1 m5
-%define coeff2 m6
-%elif ARCH_X86_64
+%if ARCH_X86_64
 mova   m8, [%2_Ycoeff_12x4]
 mova   m9, [%2_Ycoeff_3x56]
 %define coeff1 m8
 %define coeff2 m9
-%else ; x86-32 && mmsize == 16
+%else ; x86-32
 %define coeff1 [%2_Ycoeff_12x4]
 %define coeff2 [%2_Ycoeff_3x56]
-%endif ; x86-32/64 && mmsize == 8/16
-%if (ARCH_X86_64 || mmsize == 8) && %0 == 3
+%endif ; x86-32/64
+%if ARCH_X86_64 && %0 == 3
 jmp mangle(private_prefix %+ _ %+ %3 %+ 24ToY %+ SUFFIX).body
-%else ; (ARCH_X86_64 && %0 == 3) || mmsize == 8
+%else ; ARCH_X86_64 && %0 == 3
 .body:
 %if cpuflag(ssse3)
 mova   m7, [shuf_rgb_12x4]
@@ -184,7 +179,6 @@ cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, table
 movd   m1, [srcq+2]   ; (byte) { R0, B1, G1, R1 }
 movd   m2, [srcq+6]   ; (byte) { B2, G2, R2, B3 }
 movd   m3, [srcq+8]   ; (byte) { R2, B3, G3, R3 }
-%if mmsize == 16 ; i.e. sse2
 punpckldq  m0, m2 ; (byte) { B0, G0, R0, B1, B2, G2, 
R2, B3 }
 punpckldq  m1, m3 ; (byte) { R0, B1, G1, R1, R2, B3, 
G3, R3 }
 movd   m2, [srcq+12]  ; (byte) { B4, G4, R4, B5 }
@@ -193,7 +187,6 @@ cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, table
 movd   m6, [srcq+20]  ; (byte) { R6, B7, G7, R7 }
 punpckldq  m2, m5 ; (byte) { B4, G4, R4, B5, B6, G6, 
R6, B7 }
 punpckldq  m3, m6 ; (byte) { R4, B5, G5, R5, R6, B7, 
G7, R7 }
-%endif ; mmsize == 16
 punpcklbw  m0, m7 ; (word) { B0, G0, R0, B1, B2, G2, 
R2, B3 }
 punpcklbw  m1, m7 ; (word) { R0, B1, G1, R1, R2, B3, 
G3, R3 }
 punpcklbw  m2, m7 ; (word) { B4, G4, R4, B5, B6, G6, 
R6, B7 }
@@ -215,7 +208,7 @@ cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, table
 addwq, mmsize
 jl .loop
 REP_RET
-%endif ; (ARCH_X86_64 && %0 == 3) || mmsize == 8
+%endif ; ARCH_X86_64 && %0 == 3
 %endmacro
 
 ; %1 = nr. of XMM registers
@@ -275,12 +268,10 @@ cglobal %2 %+ 24ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, 
w, table
 movd   m1, [srcq+2]   ; (byte) { R0, B1, G1, R1 }
 movd   m4, [srcq+6]   ; (byte) { B2, G2, R2, B3 }
 movd   m5, [srcq+8]   ; (byte) { R2, B3, G3, R3 }
-%if mmsize == 16
 punpckldq  m0, m4 ; (byte) { B0, G0, R0, B1, B2, G2, 
R2, B3 }
 punpckldq  m1, m5 ; (byte) { R0, B1, G1, R1, R2, B3, 
G3, R3 }
 movd   m4, [srcq+12]  ; (byte) { B4, G4, R4, B5 }
 movd   m5, [srcq+14]  ; (byte) { R4, B5, G5, R5 }
-%endif ; mmsize == 16
 punpcklbw  m0, m7 ; (word) { B0, G0, R0, B1, B2, G2

[FFmpeg-cvslog] swscale/x86/swscale: Simplify macro

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Sat Jun 11 18:50:44 2022 +0200| [81d347203166a37b605920873ca2b8f19473ff3f] | 
committer: Andreas Rheinhardt

swscale/x86/swscale: Simplify macro

This is possible now that it is no longer used by MMX.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=81d347203166a37b605920873ca2b8f19473ff3f
---

 libswscale/x86/swscale.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index 97bbc4f2d0..628f12137c 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c
@@ -507,12 +507,12 @@ switch(c->dstBpc){ \
 case 9:  if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_  ## opt; 
break; \
 case 8: if ((condition_8bit) && !c->use_mmx_vfilter) vscalefn = 
ff_yuv2planeX_8_  ## opt; break; \
 }
-#define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \
+#define ASSIGN_VSCALE_FUNC(vscalefn, opt) \
 switch(c->dstBpc){ \
-case 16: if (!isBE(c->dstFormat))vscalefn = ff_yuv2plane1_16_ 
## opt1; break; \
-case 10: if (!isBE(c->dstFormat) && !isSemiPlanarYUV(c->dstFormat) && 
opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \
-case 9:  if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_  
## opt2;  break; \
-case 8:  vscalefn = ff_yuv2plane1_8_  
## opt1;  break; \
+case 16: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_16_ ## opt; 
break; \
+case 10: if (!isBE(c->dstFormat) && !isSemiPlanarYUV(c->dstFormat)) 
vscalefn = ff_yuv2plane1_10_ ## opt; break; \
+case 9:  if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_9_  ## opt;  
break; \
+case 8:   vscalefn = ff_yuv2plane1_8_  ## opt;  
break; \
 default: av_assert0(c->dstBpc>8); \
 }
 #define case_rgb(x, X, opt) \
@@ -534,7 +534,7 @@ switch(c->dstBpc){ \
 ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
 ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, ,
 HAVE_ALIGNED_STACK || ARCH_X86_64);
-ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2, sse2, 1);
+ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2);
 
 switch (c->srcFormat) {
 case AV_PIX_FMT_YA8:
@@ -590,7 +590,7 @@ switch(c->dstBpc){ \
 if (EXTERNAL_AVX(cpu_flags)) {
 ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, ,
 HAVE_ALIGNED_STACK || ARCH_X86_64);
-ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1);
+ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx);
 
 switch (c->srcFormat) {
 case AV_PIX_FMT_YUYV422:

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avfilter/x86/vf_eq_init: Remove obsolete MMXEXT function

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Sat Jun 11 01:37:50 2022 +0200| [77b2a422a09d1d801bebc3614f685fec0812963e] | 
committer: Andreas Rheinhardt

avfilter/x86/vf_eq_init: Remove obsolete MMXEXT function

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from process_mmxext are truely ancient 32bit x86s
it is removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=77b2a422a09d1d801bebc3614f685fec0812963e
---

 libavfilter/x86/vf_eq.asm| 12 ++--
 libavfilter/x86/vf_eq_init.c | 20 
 2 files changed, 2 insertions(+), 30 deletions(-)

diff --git a/libavfilter/x86/vf_eq.asm b/libavfilter/x86/vf_eq.asm
index a30a287029..5118ffcba9 100644
--- a/libavfilter/x86/vf_eq.asm
+++ b/libavfilter/x86/vf_eq.asm
@@ -24,7 +24,7 @@
 
 SECTION .text
 
-%macro PROCESS_ONE_LINE 1
+INIT_XMM sse2
 cglobal process_one_line, 5, 7, 5, src, dst, contrast, brightness, w
 movd m3, contrastd
 movd m4, brightnessd
@@ -39,7 +39,7 @@ cglobal process_one_line, 5, 7, 5, src, dst, contrast, 
brightness, w
 pxor m1, m1
 mov scalard, wd
 and scalard, mmsize-1
-sar wd, %1
+sar wd, 4
 cmp wd, 1
 jl .loop1
 
@@ -80,11 +80,3 @@ cglobal process_one_line, 5, 7, 5, src, dst, contrast, 
brightness, w
 
 .end:
 RET
-
-%endmacro
-
-INIT_MMX mmxext
-PROCESS_ONE_LINE 3
-
-INIT_XMM sse2
-PROCESS_ONE_LINE 4
diff --git a/libavfilter/x86/vf_eq_init.c b/libavfilter/x86/vf_eq_init.c
index 113056e76b..a1719672df 100644
--- a/libavfilter/x86/vf_eq_init.c
+++ b/libavfilter/x86/vf_eq_init.c
@@ -25,27 +25,10 @@
 #include "libavutil/x86/asm.h"
 #include "libavfilter/vf_eq.h"
 
-extern void ff_process_one_line_mmxext(const uint8_t *src, uint8_t *dst, short 
contrast,
-   short brightness, int w);
 extern void ff_process_one_line_sse2(const uint8_t *src, uint8_t *dst, short 
contrast,
  short brightness, int w);
 
 #if HAVE_X86ASM
-static void process_mmxext(EQParameters *param, uint8_t *dst, int dst_stride,
-   const uint8_t *src, int src_stride, int w, int h)
-{
-short contrast = (short) (param->contrast * 256 * 16);
-short brightness = ((short) (100.0 * param->brightness + 100.0) * 511)
-   / 200 - 128 - contrast / 32;
-
-while (h--) {
-ff_process_one_line_mmxext(src, dst, contrast, brightness, w);
-src += src_stride;
-dst += dst_stride;
-}
-emms_c();
-}
-
 static void process_sse2(EQParameters *param, uint8_t *dst, int dst_stride,
  const uint8_t *src, int src_stride, int w, int h)
 {
@@ -65,9 +48,6 @@ av_cold void ff_eq_init_x86(EQContext *eq)
 {
 #if HAVE_X86ASM
 int cpu_flags = av_get_cpu_flags();
-if (EXTERNAL_MMXEXT(cpu_flags)) {
-eq->process = process_mmxext;
-}
 if (EXTERNAL_SSE2(cpu_flags)) {
 eq->process = process_sse2;
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avutil/x86/pixelutils: Remove obsolete MMX(EXT) functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Thu Jun  9 17:50:53 2022 +0200| [ea043cc53ed3506775ec6239ed5f8a20718b1098] | 
committer: Andreas Rheinhardt

avutil/x86/pixelutils: Remove obsolete MMX(EXT) functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT, SSE and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2). So given that the only systems which benefit
from the 8x8 MMX (overridden by MMXEXT) or the 16x16 MMXEXT
(overridden by SSE2) are truely ancient 32bit x86s they are removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ea043cc53ed3506775ec6239ed5f8a20718b1098
---

 libavutil/x86/pixelutils.asm| 58 -
 libavutil/x86/pixelutils_init.c |  9 ---
 2 files changed, 67 deletions(-)

diff --git a/libavutil/x86/pixelutils.asm b/libavutil/x86/pixelutils.asm
index 8b45ead78b..fbe9b45971 100644
--- a/libavutil/x86/pixelutils.asm
+++ b/libavutil/x86/pixelutils.asm
@@ -25,44 +25,6 @@
 
 SECTION .text
 
-;---
-; int ff_pixelutils_sad_8x8_mmx(const uint8_t *src1, ptrdiff_t stride1,
-;   const uint8_t *src2, ptrdiff_t stride2);
-;---
-INIT_MMX mmx
-cglobal pixelutils_sad_8x8, 4,4,0, src1, stride1, src2, stride2
-pxorm7, m7
-pxorm6, m6
-%rep 4
-movam0, [src1q]
-movam2, [src1q + stride1q]
-movam1, [src2q]
-movam3, [src2q + stride2q]
-psubusb m4, m0, m1
-psubusb m5, m2, m3
-psubusb m1, m0
-psubusb m3, m2
-por m1, m4
-por m3, m5
-punpcklbw   m0, m1, m7
-punpcklbw   m2, m3, m7
-punpckhbw   m1, m7
-punpckhbw   m3, m7
-paddw   m0, m1
-paddw   m2, m3
-paddw   m0, m2
-paddw   m6, m0
-lea src1q, [src1q + 2*stride1q]
-lea src2q, [src2q + 2*stride2q]
-%endrep
-psrlq   m0, m6, 32
-paddw   m6, m0
-psrlq   m0, m6, 16
-paddw   m6, m0
-movdeax, m6
-movzx   eax, ax
-RET
-
 
;---
 ; int ff_pixelutils_sad_8x8_mmxext(const uint8_t *src1, ptrdiff_t stride1,
 ;  const uint8_t *src2, ptrdiff_t stride2);
@@ -83,26 +45,6 @@ cglobal pixelutils_sad_8x8, 4,4,0, src1, stride1, src2, 
stride2
 movdeax, m2
 RET
 
-;---
-; int ff_pixelutils_sad_16x16_mmxext(const uint8_t *src1, ptrdiff_t stride1,
-;const uint8_t *src2, ptrdiff_t stride2);
-;---
-INIT_MMX mmxext
-cglobal pixelutils_sad_16x16, 4,4,0, src1, stride1, src2, stride2
-pxorm2, m2
-%rep 16
-movam0, [src1q]
-movam1, [src1q + 8]
-psadbw  m0, [src2q]
-psadbw  m1, [src2q + 8]
-paddw   m2, m0
-paddw   m2, m1
-add src1q, stride1q
-add src2q, stride2q
-%endrep
-movdeax, m2
-RET
-
 
;---
 ; int ff_pixelutils_sad_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1,
 ;  const uint8_t *src2, ptrdiff_t stride2);
diff --git a/libavutil/x86/pixelutils_init.c b/libavutil/x86/pixelutils_init.c
index 184a3a4a9f..c3c0662414 100644
--- a/libavutil/x86/pixelutils_init.c
+++ b/libavutil/x86/pixelutils_init.c
@@ -21,13 +21,9 @@
 #include "pixelutils.h"
 #include "cpu.h"
 
-int ff_pixelutils_sad_8x8_mmx(const uint8_t *src1, ptrdiff_t stride1,
-  const uint8_t *src2, ptrdiff_t stride2);
 int ff_pixelutils_sad_8x8_mmxext(const uint8_t *src1, ptrdiff_t stride1,
  const uint8_t *src2, ptrdiff_t stride2);
 
-int ff_pixelutils_sad_16x16_mmxext(const uint8_t *src1, ptrdiff_t stride1,
-   const uint8_t *src2, ptrdiff_t stride2);
 int ff_pixelutils_sad_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1,
  const uint8_t *src2, ptrdiff_t stride2);
 int ff_pixelutils_sad_a_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1,
@@ -53,10 +49,6 @@ void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, 
int aligned)
 {
 int cpu_flags = av_get_cpu_flags();
 
-if (EXTERNAL_MMX(cpu_flags)) {
-sad[2] = ff_pixelutils_sad_8x8_mmx;
-}
-
 // The best way to use SSE2 would be to do 2 SADs in parallel,
 // but we'd have to modify the pixelutils API to return SIMD functions.
 
@@ -65,7 +57,6 @@ void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, 
in

[FFmpeg-cvslog] avcodec/x86/fft: Remove obsolete 3dnow functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Sat Jun 11 02:45:08 2022 +0200| [ec735579814b6b73e17da601ff011a918c49e40f] | 
committer: Andreas Rheinhardt

avcodec/x86/fft: Remove obsolete 3dnow functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT, SSE and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2). So given that the only systems which benefit
from the 3dnow implementations are truely ancient 32bit AMD x86s
they are removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ec735579814b6b73e17da601ff011a918c49e40f
---

 libavcodec/x86/fft.asm| 259 ++
 libavcodec/x86/fft.h  |   6 --
 libavcodec/x86/fft_init.c |  14 ---
 3 files changed, 6 insertions(+), 273 deletions(-)

diff --git a/libavcodec/x86/fft.asm b/libavcodec/x86/fft.asm
index a671e8f48e..a44596e565 100644
--- a/libavcodec/x86/fft.asm
+++ b/libavcodec/x86/fft.asm
@@ -1,5 +1,5 @@
 ;**
-;* FFT transform with SSE/3DNow optimizations
+;* FFT transform with SSE/AVX optimizations
 ;* Copyright (c) 2008 Loren Merritt
 ;* Copyright (c) 2011 Vitor Sessak
 ;*
@@ -92,29 +92,6 @@ cextern cos_ %+ i
 
 SECTION .text
 
-%macro T2_3DNOW 4 ; z0, z1, mem0, mem1
-mova %1, %3
-mova %2, %1
-pfadd%1, %4
-pfsub%2, %4
-%endmacro
-
-%macro T4_3DNOW 6 ; z0, z1, z2, z3, tmp0, tmp1
-mova %5, %3
-pfsub%3, %4
-pfadd%5, %4 ; {t6,t5}
-pxor %3, [ps_m1p1] ; {t8,t7}
-mova %6, %1
-movd [r0+12], %3
-punpckhdq %3, [r0+8]
-pfadd%1, %5 ; {r0,i0}
-pfsub%6, %5 ; {r2,i2}
-mova %4, %2
-pfadd%2, %3 ; {r1,i1}
-pfsub%4, %3 ; {r3,i3}
-SWAP %3, %6
-%endmacro
-
 ;  in: %1 = {r0,i0,r2,i2,r4,i4,r6,i6}
 ;  %2 = {r1,i1,r3,i3,r5,i5,r7,i7}
 ;  %3, %4, %5 tmp
@@ -199,7 +176,7 @@ SECTION .text
 vextractf128  %4 %+ H(%5), %3, 0
 vextractf128   %4(%5 + 1), %2, 1
 vextractf128  %4 %+ H(%5 + 1), %3, 1
-%elif cpuflag(sse) || cpuflag(3dnow)
+%elif cpuflag(sse)
 mova %3, %2
 unpcklps %2, %1
 unpckhps %3, %1
@@ -310,12 +287,6 @@ IF%1 mova  Z(1), m5
 %endif
 %endmacro
 
-%macro PUNPCK 3
-mova  %3, %1
-punpckldq %1, %2
-punpckhdq %3, %2
-%endmacro
-
 %define Z(x) [r0+mmsize*x]
 %define Z2(x) [r0+mmsize*x]
 %define ZH(x) [r0+mmsize*x+mmsize/2]
@@ -462,68 +433,6 @@ fft16_sse:
 ret
 
 
-%macro FFT48_3DNOW 0
-align 16
-fft4 %+ SUFFIX:
-T2_3DNOW m0, m1, Z(0), Z(1)
-mova m2, Z(2)
-mova m3, Z(3)
-T4_3DNOW m0, m1, m2, m3, m4, m5
-PUNPCK   m0, m1, m4
-PUNPCK   m2, m3, m5
-mova   Z(0), m0
-mova   Z(1), m4
-mova   Z(2), m2
-mova   Z(3), m5
-ret
-
-align 16
-fft8 %+ SUFFIX:
-T2_3DNOW m0, m1, Z(0), Z(1)
-mova m2, Z(2)
-mova m3, Z(3)
-T4_3DNOW m0, m1, m2, m3, m4, m5
-mova   Z(0), m0
-mova   Z(2), m2
-T2_3DNOW m4, m5,  Z(4),  Z(5)
-T2_3DNOW m6, m7, Z2(6), Z2(7)
-PSWAPD   m0, m5
-PSWAPD   m2, m7
-pxor m0, [ps_m1p1]
-pxor m2, [ps_m1p1]
-pfsubm5, m0
-pfaddm7, m2
-pfmulm5, [ps_root2]
-pfmulm7, [ps_root2]
-T4_3DNOW m1, m3, m5, m7, m0, m2
-mova   Z(5), m5
-mova  Z2(7), m7
-mova m0, Z(0)
-mova m2, Z(2)
-T4_3DNOW m0, m2, m4, m6, m5, m7
-PUNPCK   m0, m1, m5
-PUNPCK   m2, m3, m7
-mova   Z(0), m0
-mova   Z(1), m5
-mova   Z(2), m2
-mova   Z(3), m7
-PUNPCK   m4,  Z(5), m5
-PUNPCK   m6, Z2(7), m7
-mova   Z(4), m4
-mova   Z(5), m5
-mova  Z2(6), m6
-mova  Z2(7), m7
-ret
-%endmacro
-
-%if ARCH_X86_32
-INIT_MMX 3dnowext
-FFT48_3DNOW
-
-INIT_MMX 3dnow
-FFT48_3DNOW
-%endif
-
 %define Z(x) [zcq + o1q*(x&6) + mmsize*(x&1)]
 %define Z2(x) [zcq + o3q + mmsize*(x&1)]
 %define ZH(x) [zcq + o1q*(x&6) + mmsize*(x&1) + mmsize/2]
@@ -575,7 +484,7 @@ INIT_XMM sse
 DECL_PASS pass_sse, PASS_BIG 1
 DECL_PASS pass_interleave_sse, PASS_BIG 0
 
-%macro FFT_CALC_FUNC 0
+INIT_XMM sse
 cglobal fft_calc, 2,5,8
 mov r3d, [r0 + FFTContext.nbits]
 PUSHr1
@@ -592,36 +501,16 @@ cglobal fft_calc, 2,5,8
 shl r2, cl
 sub r4, r2
 .loop:
-%if mmsize == 8
-PSWAPD  m0, [r4 + r2 + 4]
-mova [r4 + r2 + 4], m0
-%else
 movaps   xmm0, [r4 + r2]
 movaps   xmm1, xmm0
 unpcklps xmm0, [r4 + r2 + 16]
 unpckhps xmm1, [r4 + r2 + 16]
 movaps   [r4 + r2],  xmm0
 movaps   [r4 + r2 + 16], xmm1
-%endif
 add  r2, mmsize*2
 jl   .loop
 .end:
-%if cpuflag(3dnow)
-femms
-RET
-%else
 REP_RET
-%endif
-%endmacro
-
-%if ARCH_X86_32
-INIT_MMX 3dnow
-FFT_CALC_FUNC
-INIT_MMX 3dnowext
-FFT_CALC_FUNC
-%endif
-INIT_XMM sse
-FFT_CALC_FUNC
 
 cglobal fft_permute, 2,7,1
 mov r4,  [r0 + FFTContext.revtab]
@@ -656,7 +545,7 @@ cglobal fft_

[FFmpeg-cvslog] avcodec/x86/vorbisdsp: Remove obsolete 3dnow functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Sat Jun 11 03:31:25 2022 +0200| [f76477d4d78b01ae0170d3e7d8ef77ac3105a80c] | 
committer: Andreas Rheinhardt

avcodec/x86/vorbisdsp: Remove obsolete 3dnow functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT, SSE and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2). So given that the only systems which benefit
from the 3dnow implementations are truely ancient 32bit AMD x86s
they are removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f76477d4d78b01ae0170d3e7d8ef77ac3105a80c
---

 libavcodec/x86/vorbisdsp.asm| 29 -
 libavcodec/x86/vorbisdsp_init.c |  6 --
 2 files changed, 35 deletions(-)

diff --git a/libavcodec/x86/vorbisdsp.asm b/libavcodec/x86/vorbisdsp.asm
index d952296716..9afe2eb352 100644
--- a/libavcodec/x86/vorbisdsp.asm
+++ b/libavcodec/x86/vorbisdsp.asm
@@ -27,35 +27,6 @@ pdw_8000: times 4 dd 0x8000
 
 SECTION .text
 
-%if ARCH_X86_32
-INIT_MMX 3dnow
-cglobal vorbis_inverse_coupling, 3, 3, 6, mag, ang, block_size
-pxor m7, m7
-leamagq, [magq+block_sizeq*4]
-leaangq, [angq+block_sizeq*4]
-neg block_sizeq
-.loop:
-mova m0, [magq+block_sizeq*4]
-mova m1, [angq+block_sizeq*4]
-mova m2, m0
-mova m3, m1
-pfcmpge  m2, m7 ; m <= 0.0
-pfcmpge  m3, m7 ; a <= 0.0
-pslldm2, 31 ; keep only the sign bit
-pxor m1, m2
-mova m4, m3
-pand m3, m1
-pandnm4, m1
-pfaddm3, m0 ; a = m + ((a < 0) & (a ^ sign(m)))
-pfsubm0, m4 ; m = m + ((a > 0) & (a ^ sign(m)))
-mova   [angq+block_sizeq*4], m3
-mova   [magq+block_sizeq*4], m0
-add block_sizeq, 2
-jl .loop
-femms
-RET
-%endif
-
 INIT_XMM sse
 cglobal vorbis_inverse_coupling, 3, 3, 6, mag, ang, block_size
 mova m5, [pdw_8000]
diff --git a/libavcodec/x86/vorbisdsp_init.c b/libavcodec/x86/vorbisdsp_init.c
index bc1cc43a18..da9f9e685e 100644
--- a/libavcodec/x86/vorbisdsp_init.c
+++ b/libavcodec/x86/vorbisdsp_init.c
@@ -24,8 +24,6 @@
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/vorbisdsp.h"
 
-void ff_vorbis_inverse_coupling_3dnow(float *mag, float *ang,
-  intptr_t blocksize);
 void ff_vorbis_inverse_coupling_sse(float *mag, float *ang,
 intptr_t blocksize);
 
@@ -33,10 +31,6 @@ av_cold void ff_vorbisdsp_init_x86(VorbisDSPContext *dsp)
 {
 int cpu_flags = av_get_cpu_flags();
 
-#if ARCH_X86_32
-if (EXTERNAL_AMD3DNOW(cpu_flags))
-dsp->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_3dnow;
-#endif /* ARCH_X86_32 */
 if (EXTERNAL_SSE(cpu_flags))
 dsp->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_sse;
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avutil/x86/float_dsp: Remove obsolete 3dnowext function

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Sun Jun 12 00:40:09 2022 +0200| [2718a3be1f8867fd4f6cb3f452d6917838b1ed88] | 
committer: Andreas Rheinhardt

avutil/x86/float_dsp: Remove obsolete 3dnowext function

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT, SSE and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2). So given that the only systems which benefit
from ff_vector_fmul_window_3dnowext are truely ancient 32bit
AMD x86s it is removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2718a3be1f8867fd4f6cb3f452d6917838b1ed88
---

 libavutil/x86/float_dsp.asm| 25 +
 libavutil/x86/float_dsp_init.c |  5 -
 2 files changed, 1 insertion(+), 29 deletions(-)

diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm
index b773e61a64..cca4d019c7 100644
--- a/libavutil/x86/float_dsp.asm
+++ b/libavutil/x86/float_dsp.asm
@@ -294,7 +294,7 @@ VECTOR_DMUL_SCALAR
 ; vector_fmul_window(float *dst, const float *src0,
 ;const float *src1, const float *win, int len);
 ;-
-%macro VECTOR_FMUL_WINDOW 0
+INIT_XMM sse
 cglobal vector_fmul_window, 5, 6, 6, dst, src0, src1, win, len, len1
 shl lend, 2
 lealen1q, [lenq - mmsize]
@@ -305,7 +305,6 @@ cglobal vector_fmul_window, 5, 6, 6, dst, src0, src1, win, 
len, len1
 .loop:
 mova  m0, [winq  + lenq]
 mova  m4, [src0q + lenq]
-%if cpuflag(sse)
 mova  m1, [winq  + len1q]
 mova  m5, [src1q + len1q]
 shufpsm1, m1, 0x1b
@@ -319,34 +318,12 @@ cglobal vector_fmul_window, 5, 6, 6, dst, src0, src1, 
win, len, len1
 addps m2, m3
 subps m1, m0
 shufpsm2, m2, 0x1b
-%else
-pswapdm1, [winq  + len1q]
-pswapdm5, [src1q + len1q]
-mova  m2, m0
-mova  m3, m1
-pfmul m2, m4
-pfmul m3, m5
-pfmul m1, m4
-pfmul m0, m5
-pfadd m2, m3
-pfsub m1, m0
-pswapdm2, m2
-%endif
 mova  [dstq + lenq], m1
 mova  [dstq + len1q], m2
 sub   len1q, mmsize
 add   lenq,  mmsize
 jl .loop
-%if mmsize == 8
-femms
-%endif
 REP_RET
-%endmacro
-
-INIT_MMX 3dnowext
-VECTOR_FMUL_WINDOW
-INIT_XMM sse
-VECTOR_FMUL_WINDOW
 
 ;-
 ; vector_fmul_add(float *dst, const float *src0, const float *src1,
diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c
index 8826e4e2c9..ad17bc2044 100644
--- a/libavutil/x86/float_dsp_init.c
+++ b/libavutil/x86/float_dsp_init.c
@@ -56,8 +56,6 @@ void ff_vector_dmul_scalar_sse2(double *dst, const double 
*src,
 void ff_vector_dmul_scalar_avx(double *dst, const double *src,
double mul, int len);
 
-void ff_vector_fmul_window_3dnowext(float *dst, const float *src0,
-const float *src1, const float *win, int 
len);
 void ff_vector_fmul_window_sse(float *dst, const float *src0,
const float *src1, const float *win, int len);
 
@@ -83,9 +81,6 @@ av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
 {
 int cpu_flags = av_get_cpu_flags();
 
-if (EXTERNAL_AMD3DNOWEXT(cpu_flags)) {
-fdsp->vector_fmul_window = ff_vector_fmul_window_3dnowext;
-}
 if (EXTERNAL_SSE(cpu_flags)) {
 fdsp->vector_fmul = ff_vector_fmul_sse;
 fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/x86/videodsp: Remove obsolete MMX, 3dnow, SSE functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Sun Jun 12 02:13:39 2022 +0200| [19abc4c0a9ee5e45b630d7ca9815e8d0723a24e2] | 
committer: Andreas Rheinhardt

avcodec/x86/videodsp: Remove obsolete MMX, 3dnow, SSE functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT, SSE and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2). So given that the only systems which benefit
from these functions are truely ancient 32bit x86s they are removed.

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=19abc4c0a9ee5e45b630d7ca9815e8d0723a24e2
---

 libavcodec/x86/videodsp.asm| 38 ++
 libavcodec/x86/videodsp_init.c | 71 --
 2 files changed, 3 insertions(+), 106 deletions(-)

diff --git a/libavcodec/x86/videodsp.asm b/libavcodec/x86/videodsp.asm
index e237860700..b19a8300c5 100644
--- a/libavcodec/x86/videodsp.asm
+++ b/libavcodec/x86/videodsp.asm
@@ -45,7 +45,6 @@ SECTION .text
 jnz .%1_y_loop
 %endmacro
 
-%macro vvar_fn 0
 ; .. <- zero
 ; ||<- top is copied from first line in body of source
 ; || <- start_y
@@ -53,6 +52,7 @@ SECTION .text
 ; || <- end_y
 ; ||<- bottom is copied from last line in body of source
 ; '' <- bh
+INIT_XMM sse
 %if ARCH_X86_64
 cglobal emu_edge_vvar, 7, 8, 1, dst, dst_stride, src, src_stride, \
 start_y, end_y, bh, w
@@ -81,15 +81,6 @@ cglobal emu_edge_vvar, 1, 6, 1, dst, src, start_y, end_y, 
bh, w
 V_COPY_ROW   bottom, bhq;   v_copy_row(bottom, bh)
 .end:   ; }
 RET
-%endmacro
-
-%if ARCH_X86_32
-INIT_MMX mmx
-vvar_fn
-%endif
-
-INIT_XMM sse
-vvar_fn
 
 %macro hvar_fn 0
 cglobal emu_edge_hvar, 5, 6, 1, dst, dst_stride, start_x, n_words, h, w
@@ -105,11 +96,7 @@ cglobal emu_edge_hvar, 5, 6, 1, dst, dst_stride, start_x, 
n_words, h, w
 imul wd, 0x01010101 ;   w *= 0x01010101
 movd m0, wd
 mov  wq, n_wordsq   ;   initialize w
-%if cpuflag(sse2)
 pshufd   m0, m0, q  ;   splat
-%else ; mmx
-punpckldqm0, m0 ;   splat
-%endif ; mmx/sse
 %endif ; avx2
 .x_loop:;   do {
 movu[dstq+wq*2], m0 ; write($reg, $mmsize)
@@ -123,11 +110,6 @@ cglobal emu_edge_hvar, 5, 6, 1, dst, dst_stride, start_x, 
n_words, h, w
 RET
 %endmacro
 
-%if ARCH_X86_32
-INIT_MMX mmx
-hvar_fn
-%endif
-
 INIT_XMM sse2
 hvar_fn
 
@@ -338,9 +320,6 @@ cglobal emu_edge_vfix %+ %%n, 1, 5, 1, dst, src, start_y, 
end_y, bh
 
 INIT_MMX mmx
 VERTICAL_EXTEND 1, 15
-%if ARCH_X86_32
-VERTICAL_EXTEND 16, 22
-%endif
 
 INIT_XMM sse
 VERTICAL_EXTEND 16, 22
@@ -438,9 +417,6 @@ cglobal emu_edge_hfix %+ %%n, 4, 5, 1, dst, dst_stride, 
start_x, bh, val
 
 INIT_MMX mmx
 H_EXTEND 2, 14
-%if ARCH_X86_32
-H_EXTEND 16, 22
-%endif
 
 INIT_XMM sse2
 H_EXTEND 16, 22
@@ -450,19 +426,11 @@ INIT_XMM avx2
 H_EXTEND 8, 22
 %endif
 
-%macro PREFETCH_FN 1
+INIT_MMX mmxext
 cglobal prefetch, 3, 3, 0, buf, stride, h
 .loop:
-%1  [bufq]
+prefetcht0 [bufq]
 add  bufq, strideq
 dechd
 jg .loop
 REP_RET
-%endmacro
-
-INIT_MMX mmxext
-PREFETCH_FN prefetcht0
-%if ARCH_X86_32
-INIT_MMX 3dnow
-PREFETCH_FN prefetch
-%endif
diff --git a/libavcodec/x86/videodsp_init.c b/libavcodec/x86/videodsp_init.c
index 961424aa13..a14c9635fb 100644
--- a/libavcodec/x86/videodsp_init.c
+++ b/libavcodec/x86/videodsp_init.c
@@ -52,26 +52,6 @@ extern emu_edge_vfix_func ff_emu_edge_vfix12_mmx;
 extern emu_edge_vfix_func ff_emu_edge_vfix13_mmx;
 extern emu_edge_vfix_func ff_emu_edge_vfix14_mmx;
 extern emu_edge_vfix_func ff_emu_edge_vfix15_mmx;
-extern emu_edge_vfix_func ff_emu_edge_vfix16_mmx;
-extern emu_edge_vfix_func ff_emu_edge_vfix17_mmx;
-extern emu_edge_vfix_func ff_emu_edge_vfix18_mmx;
-extern emu_edge_vfix_func ff_emu_edge_vfix19_mmx;
-extern emu_edge_vfix_func ff_emu_edge_vfix20_mmx;
-extern emu_edge_vfix_func ff_emu_edge_vfix21_mmx;
-extern emu_edge_vfix_func ff_emu_edge_vfix22_mmx;
-#if ARCH_X86_32
-static emu_edge_vfix_func * const vfixtbl_mmx[22] = {
-&ff_emu_edge_vfix1_mmx,  &ff_emu_edge_vfix2_mmx,  &ff_emu_edge_vfix3_mmx,
-&ff_emu_edge_vfix4_mmx,  &ff_emu_edge_vfix5_mmx,  &ff_emu_edge_vfix6_mmx,
-&ff_emu_edge_vfix7_mmx,  &ff_emu_edge_vfix8_mmx,  &ff_emu_edge_vfix9_mmx,
-&ff_emu_edge_vfix10_mmx, &ff_emu_edge_vfix11_mmx, &ff_emu_edge_vfix12_mmx,
-&ff_emu_edge_vfix13_mmx, &ff_emu_edge_vfix14_mmx, &ff_emu_edge_vfix15_mmx,
-&ff_emu_edge_vfix16_mmx, &ff_emu_edge_vfix17_mmx, &ff_emu_edge_vfix18_mmx,
-&ff_emu_edge_vfix19_mmx, &ff_emu_edge_vfix20_mmx, &ff_emu_edge_vfix21_mmx,
-&ff_emu_edge_vfix22_mmx
-};
-#endif
-extern emu_edge_vvar_func ff_emu_edge_vvar_mmx;
 extern emu_edge_vfix_func f

[FFmpeg-cvslog] avfilter/x86/vf_yadif: Remove obsolete MMXEXT functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Mon Jun 13 08:34:13 2022 +0200| [4d7128be9a31c7bf6cb79436711ded3cc9767fe8] | 
committer: Andreas Rheinhardt

avfilter/x86/vf_yadif: Remove obsolete MMXEXT functions

The only system which benefit from these are truely ancient
32bit x86s as all other systems use at least the SSE2 versions
(this includes all x64 cpus (which is why this code is restricted
to x86-32)).

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4d7128be9a31c7bf6cb79436711ded3cc9767fe8
---

 libavfilter/x86/vf_yadif.asm|  8 
 libavfilter/x86/vf_yadif_init.c | 21 -
 libavfilter/x86/yadif-10.asm|  4 
 libavfilter/x86/yadif-16.asm|  4 
 4 files changed, 37 deletions(-)

diff --git a/libavfilter/x86/vf_yadif.asm b/libavfilter/x86/vf_yadif.asm
index a29620ce55..809cebdd3f 100644
--- a/libavfilter/x86/vf_yadif.asm
+++ b/libavfilter/x86/vf_yadif.asm
@@ -133,12 +133,8 @@ SECTION .text
 psubusb  m2, m3
 psubusb  m3, m4
 pmaxub   m2, m3
-%if mmsize == 16
 mova m3, m2
 psrldq   m3, 2
-%else
-pshufw   m3, m2, q0021
-%endif
 punpcklbwm2, m7
 punpcklbwm3, m7
 paddwm0, m2
@@ -237,7 +233,3 @@ INIT_XMM ssse3
 YADIF
 INIT_XMM sse2
 YADIF
-%if ARCH_X86_32
-INIT_MMX mmxext
-YADIF
-%endif
diff --git a/libavfilter/x86/vf_yadif_init.c b/libavfilter/x86/vf_yadif_init.c
index 66cbee8510..257c3f9199 100644
--- a/libavfilter/x86/vf_yadif_init.c
+++ b/libavfilter/x86/vf_yadif_init.c
@@ -23,9 +23,6 @@
 #include "libavutil/x86/cpu.h"
 #include "libavfilter/yadif.h"
 
-void ff_yadif_filter_line_mmxext(void *dst, void *prev, void *cur,
- void *next, int w, int prefs,
- int mrefs, int parity, int mode);
 void ff_yadif_filter_line_sse2(void *dst, void *prev, void *cur,
void *next, int w, int prefs,
int mrefs, int parity, int mode);
@@ -33,9 +30,6 @@ void ff_yadif_filter_line_ssse3(void *dst, void *prev, void 
*cur,
 void *next, int w, int prefs,
 int mrefs, int parity, int mode);
 
-void ff_yadif_filter_line_16bit_mmxext(void *dst, void *prev, void *cur,
-   void *next, int w, int prefs,
-   int mrefs, int parity, int mode);
 void ff_yadif_filter_line_16bit_sse2(void *dst, void *prev, void *cur,
  void *next, int w, int prefs,
  int mrefs, int parity, int mode);
@@ -46,9 +40,6 @@ void ff_yadif_filter_line_16bit_sse4(void *dst, void *prev, 
void *cur,
  void *next, int w, int prefs,
  int mrefs, int parity, int mode);
 
-void ff_yadif_filter_line_10bit_mmxext(void *dst, void *prev, void *cur,
-   void *next, int w, int prefs,
-   int mrefs, int parity, int mode);
 void ff_yadif_filter_line_10bit_sse2(void *dst, void *prev, void *cur,
  void *next, int w, int prefs,
  int mrefs, int parity, int mode);
@@ -63,10 +54,6 @@ av_cold void ff_yadif_init_x86(YADIFContext *yadif)
   : yadif->csp->comp[0].depth;
 
 if (bit_depth >= 15) {
-#if ARCH_X86_32
-if (EXTERNAL_MMXEXT(cpu_flags))
-yadif->filter_line = ff_yadif_filter_line_16bit_mmxext;
-#endif /* ARCH_X86_32 */
 if (EXTERNAL_SSE2(cpu_flags))
 yadif->filter_line = ff_yadif_filter_line_16bit_sse2;
 if (EXTERNAL_SSSE3(cpu_flags))
@@ -74,19 +61,11 @@ av_cold void ff_yadif_init_x86(YADIFContext *yadif)
 if (EXTERNAL_SSE4(cpu_flags))
 yadif->filter_line = ff_yadif_filter_line_16bit_sse4;
 } else if ( bit_depth >= 9 && bit_depth <= 14) {
-#if ARCH_X86_32
-if (EXTERNAL_MMXEXT(cpu_flags))
-yadif->filter_line = ff_yadif_filter_line_10bit_mmxext;
-#endif /* ARCH_X86_32 */
 if (EXTERNAL_SSE2(cpu_flags))
 yadif->filter_line = ff_yadif_filter_line_10bit_sse2;
 if (EXTERNAL_SSSE3(cpu_flags))
 yadif->filter_line = ff_yadif_filter_line_10bit_ssse3;
 } else {
-#if ARCH_X86_32
-if (EXTERNAL_MMXEXT(cpu_flags))
-yadif->filter_line = ff_yadif_filter_line_mmxext;
-#endif /* ARCH_X86_32 */
 if (EXTERNAL_SSE2(cpu_flags))
 yadif->filter_line = ff_yadif_filter_line_sse2;
 if (EXTERNAL_SSSE3(cpu_flags))
diff --git a/libavfilter/x86/yadif-10.asm b/libavfilter/x86/yadif-10.asm
index 8853e0d2c7..c6952db64b 100644
--- a/libavfilter/x86/yadif-10.asm
+++ b/libavfilter/x86/yadif-10.asm
@@ -249,7 +249,3 @@ INIT_XMM ssse3
 YADIF
 INIT_XMM sse2
 YADIF
-%if ARCH_X86_32
-INIT_MMX mmxext
-

[FFmpeg-cvslog] avfilter/x86/vf_idet: Remove obsolete MMX(EXT) functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Mon Jun 13 08:39:57 2022 +0200| [7c3c1d938f2e01bd607deb814706f67438e85b7a] | 
committer: Andreas Rheinhardt

avfilter/x86/vf_idet: Remove obsolete MMX(EXT) functions

The only system which benefit from these are truely ancient
32bit x86s as all other systems use at least the SSE2 versions
(this includes all x64 cpus (which is why this code is restricted
to x86-32)).

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=7c3c1d938f2e01bd607deb814706f67438e85b7a
---

 libavfilter/x86/vf_idet.asm| 58 --
 libavfilter/x86/vf_idet_init.c | 16 +---
 2 files changed, 1 insertion(+), 73 deletions(-)

diff --git a/libavfilter/x86/vf_idet.asm b/libavfilter/x86/vf_idet.asm
index 9596abd7e2..7bc8e7d2c4 100644
--- a/libavfilter/x86/vf_idet.asm
+++ b/libavfilter/x86/vf_idet.asm
@@ -25,60 +25,6 @@
 
 SECTION .text
 
-; Implementation that does 8-bytes at a time using single-word operations.
-%macro IDET_FILTER_LINE 1
-INIT_MMX %1
-cglobal idet_filter_line, 4, 5, 0, a, b, c, width, index
-xor   indexq, indexq
-%define   m_zero m2
-%define   m_sum  m5
-pxor  m_sum, m_sum
-pxor  m_zero, m_zero
-
-.loop:
-movu  m0, [aq + indexq*1]
-punpckhbw m1, m0, m_zero
-punpcklbw m0, m_zero
-
-movu  m3, [cq + indexq*1]
-punpckhbw m4, m3, m_zero
-punpcklbw m3, m_zero
-
-paddswm1, m4
-paddswm0, m3
-
-movu  m3, [bq + indexq*1]
-punpckhbw m4, m3, m_zero
-punpcklbw m3, m_zero
-
-paddw m4, m4
-paddw m3, m3
-psubswm1, m4
-psubswm0, m3
-
-ABS2  m1, m0, m4, m3
-
-paddw m0, m1
-punpckhwd m1, m0, m_zero
-punpcklwd m0, m_zero
-
-paddd m0, m1
-paddd m_sum, m0
-
-add   indexq, 0x8
-CMP   widthd, indexd
-jg.loop
-
-HADDD m_sum, m0
-movd  eax, m_sum
-RET
-%endmacro
-
-%if ARCH_X86_32
-IDET_FILTER_LINE mmxext
-IDET_FILTER_LINE mmx
-%endif
-
 ;**
 ; 16bit implementation that does 4/8-pixels at a time
 
@@ -128,10 +74,6 @@ cglobal idet_filter_line_16bit, 4, 5, 8, a, b, c, width, 
index
 
 INIT_XMM sse2
 IDET_FILTER_LINE_16BIT 8
-%if ARCH_X86_32
-INIT_MMX mmx
-IDET_FILTER_LINE_16BIT 4
-%endif
 
 ;**
 ; SSE2 8-bit implementation that does 16-bytes at a time:
diff --git a/libavfilter/x86/vf_idet_init.c b/libavfilter/x86/vf_idet_init.c
index d4d9bd0893..acb4e2a778 100644
--- a/libavfilter/x86/vf_idet_init.c
+++ b/libavfilter/x86/vf_idet_init.c
@@ -24,7 +24,7 @@
 
 #if HAVE_X86ASM
 
-/* declares main callable idet_filter_line_{mmx,mmxext,sse2}() */
+/* declares main callable idet_filter_line_sse2() */
 #define FUNC_MAIN_DECL(KIND, SPAN)\
 int ff_idet_filter_line_##KIND(const uint8_t *a, const uint8_t *b,\
const uint8_t *c, int w);  \
@@ -58,11 +58,6 @@ static int idet_filter_line_16bit_##KIND(const uint16_t *a, 
const uint16_t *b, \
 
 FUNC_MAIN_DECL(sse2, 16)
 FUNC_MAIN_DECL_16bit(sse2, 8)
-#if ARCH_X86_32
-FUNC_MAIN_DECL(mmx, 8)
-FUNC_MAIN_DECL(mmxext, 8)
-FUNC_MAIN_DECL_16bit(mmx, 4)
-#endif
 
 #endif
 av_cold void ff_idet_init_x86(IDETContext *idet, int for_16b)
@@ -70,15 +65,6 @@ av_cold void ff_idet_init_x86(IDETContext *idet, int for_16b)
 #if HAVE_X86ASM
 const int cpu_flags = av_get_cpu_flags();
 
-#if ARCH_X86_32
-if (EXTERNAL_MMX(cpu_flags)) {
-idet->filter_line = for_16b ? 
(ff_idet_filter_func)idet_filter_line_16bit_mmx : idet_filter_line_mmx;
-}
-if (EXTERNAL_MMXEXT(cpu_flags)) {
-idet->filter_line = for_16b ? 
(ff_idet_filter_func)idet_filter_line_16bit_mmx : idet_filter_line_mmxext;
-}
-#endif // ARCH_x86_32
-
 if (EXTERNAL_SSE2(cpu_flags)) {
 idet->filter_line = for_16b ? 
(ff_idet_filter_func)idet_filter_line_16bit_sse2 : idet_filter_line_sse2;
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avfilter/x86/vf_bwdif: Remove obsolete MMXEXT functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Mon Jun 13 08:42:46 2022 +0200| [ed42a51930d9cca6dfed35c4af4b5b3a3f7f6a04] | 
committer: Andreas Rheinhardt

avfilter/x86/vf_bwdif: Remove obsolete MMXEXT functions

The only system which benefit from these are truely ancient
32bit x86s as all other systems use at least the SSE2 versions
(this includes all x64 cpus (which is why this code is restricted
to x86-32)).

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ed42a51930d9cca6dfed35c4af4b5b3a3f7f6a04
---

 libavfilter/x86/vf_bwdif.asm|  4 
 libavfilter/x86/vf_bwdif_init.c | 16 
 2 files changed, 20 deletions(-)

diff --git a/libavfilter/x86/vf_bwdif.asm b/libavfilter/x86/vf_bwdif.asm
index 147b7c6ac6..0b453da53b 100644
--- a/libavfilter/x86/vf_bwdif.asm
+++ b/libavfilter/x86/vf_bwdif.asm
@@ -264,7 +264,3 @@ INIT_XMM ssse3
 BWDIF
 INIT_XMM sse2
 BWDIF
-%if ARCH_X86_32
-INIT_MMX mmxext
-BWDIF
-%endif
diff --git a/libavfilter/x86/vf_bwdif_init.c b/libavfilter/x86/vf_bwdif_init.c
index f632c4f340..e24e5cd9b1 100644
--- a/libavfilter/x86/vf_bwdif_init.c
+++ b/libavfilter/x86/vf_bwdif_init.c
@@ -24,10 +24,6 @@
 #include "libavutil/x86/cpu.h"
 #include "libavfilter/bwdif.h"
 
-void ff_bwdif_filter_line_mmxext(void *dst, void *prev, void *cur, void *next,
- int w, int prefs, int mrefs, int prefs2,
- int mrefs2, int prefs3, int mrefs3, int 
prefs4,
- int mrefs4, int parity, int clip_max);
 void ff_bwdif_filter_line_sse2(void *dst, void *prev, void *cur, void *next,
int w, int prefs, int mrefs, int prefs2,
int mrefs2, int prefs3, int mrefs3, int prefs4,
@@ -37,10 +33,6 @@ void ff_bwdif_filter_line_ssse3(void *dst, void *prev, void 
*cur, void *next,
 int mrefs2, int prefs3, int mrefs3, int prefs4,
 int mrefs4, int parity, int clip_max);
 
-void ff_bwdif_filter_line_12bit_mmxext(void *dst, void *prev, void *cur, void 
*next,
-   int w, int prefs, int mrefs, int prefs2,
-   int mrefs2, int prefs3, int mrefs3, int 
prefs4,
-   int mrefs4, int parity, int clip_max);
 void ff_bwdif_filter_line_12bit_sse2(void *dst, void *prev, void *cur, void 
*next,
  int w, int prefs, int mrefs, int prefs2,
  int mrefs2, int prefs3, int mrefs3, int 
prefs4,
@@ -57,19 +49,11 @@ av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif)
 int bit_depth = (!yadif->csp) ? 8 : yadif->csp->comp[0].depth;
 
 if (bit_depth <= 8) {
-#if ARCH_X86_32
-if (EXTERNAL_MMXEXT(cpu_flags))
-bwdif->filter_line = ff_bwdif_filter_line_mmxext;
-#endif /* ARCH_X86_32 */
 if (EXTERNAL_SSE2(cpu_flags))
 bwdif->filter_line = ff_bwdif_filter_line_sse2;
 if (EXTERNAL_SSSE3(cpu_flags))
 bwdif->filter_line = ff_bwdif_filter_line_ssse3;
 } else if (bit_depth <= 12) {
-#if ARCH_X86_32
-if (EXTERNAL_MMXEXT(cpu_flags))
-bwdif->filter_line = ff_bwdif_filter_line_12bit_mmxext;
-#endif /* ARCH_X86_32 */
 if (EXTERNAL_SSE2(cpu_flags))
 bwdif->filter_line = ff_bwdif_filter_line_12bit_sse2;
 if (EXTERNAL_SSSE3(cpu_flags))

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/x86/dct32: Remove obsolete SSE function

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Tue Jun 14 20:26:16 2022 +0200| [54784ffac523c6fbc50762a91b3dcc481933b0b0] | 
committer: Andreas Rheinhardt

avcodec/x86/dct32: Remove obsolete SSE function

The only systems which benefit from ff_dct32_float_sse are truely
ancient 32bit x86s as all other systems use at least the SSE2 versions
(this includes all x64 cpus (which is why this code is restricted
to x86-32)).

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=54784ffac523c6fbc50762a91b3dcc481933b0b0
---

 libavcodec/x86/dct32.asm  | 12 +---
 libavcodec/x86/dct_init.c |  5 -
 2 files changed, 1 insertion(+), 16 deletions(-)

diff --git a/libavcodec/x86/dct32.asm b/libavcodec/x86/dct32.asm
index 21e2f21c97..37fba51543 100644
--- a/libavcodec/x86/dct32.asm
+++ b/libavcodec/x86/dct32.asm
@@ -387,7 +387,7 @@ INIT_XMM
 %endif
 
 
-; void ff_dct32_float_sse(FFTSample *out, const FFTSample *in)
+; void ff_dct32_float(FFTSample *out, const FFTSample *in)
 %macro DCT32_FUNC 0
 cglobal dct32_float, 2, 3, 16, out, in, tmp
 ; pass 1
@@ -474,18 +474,8 @@ cglobal dct32_float, 2, 3, 16, out, in, tmp
 %endmacro
 
 %macro LOAD_INV 2
-%if cpuflag(sse2)
 pshufd  %1, %2, 0x1b
-%elif cpuflag(sse)
-movaps  %1, %2
-shufps  %1, %1, 0x1b
-%endif
 %endmacro
 
-%if ARCH_X86_32
-INIT_XMM sse
-DCT32_FUNC
-%endif
-
 INIT_XMM sse2
 DCT32_FUNC
diff --git a/libavcodec/x86/dct_init.c b/libavcodec/x86/dct_init.c
index c31ef92238..d0e4b34dd3 100644
--- a/libavcodec/x86/dct_init.c
+++ b/libavcodec/x86/dct_init.c
@@ -22,7 +22,6 @@
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/dct.h"
 
-void ff_dct32_float_sse(FFTSample *out, const FFTSample *in);
 void ff_dct32_float_sse2(FFTSample *out, const FFTSample *in);
 void ff_dct32_float_avx(FFTSample *out, const FFTSample *in);
 
@@ -30,10 +29,6 @@ av_cold void ff_dct_init_x86(DCTContext *s)
 {
 int cpu_flags = av_get_cpu_flags();
 
-#if ARCH_X86_32
-if (EXTERNAL_SSE(cpu_flags))
-s->dct32 = ff_dct32_float_sse;
-#endif
 if (EXTERNAL_SSE2(cpu_flags))
 s->dct32 = ff_dct32_float_sse2;
 if (EXTERNAL_AVX_FAST(cpu_flags))

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/x86/vp3dsp: Remove obsolete MMX functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Mon Jun 13 16:57:39 2022 +0200| [eefec0663406d7c2749a280f5244caaacb069c60] | 
committer: Andreas Rheinhardt

avcodec/x86/vp3dsp: Remove obsolete MMX functions

The only system which benefit from these are truely ancient
32bit x86s as all other systems use at least the SSE2 versions
(this includes all x64 cpus (which is why this code is restricted
to x86-32)).

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=eefec0663406d7c2749a280f5244caaacb069c60
---

 libavcodec/x86/vp3dsp.asm| 62 
 libavcodec/x86/vp3dsp_init.c |  7 -
 2 files changed, 69 deletions(-)

diff --git a/libavcodec/x86/vp3dsp.asm b/libavcodec/x86/vp3dsp.asm
index d88d5a1edf..005ecbc9a0 100644
--- a/libavcodec/x86/vp3dsp.asm
+++ b/libavcodec/x86/vp3dsp.asm
@@ -571,40 +571,25 @@ cglobal vp3_idct_put, 3, 4, 9
 mova  m1, [r2+mmsize*2+%%i]
 mova  m2, [r2+mmsize*4+%%i]
 mova  m3, [r2+mmsize*6+%%i]
-%if mmsize == 8
-packsswb  m0, [r2+mmsize*8+%%i]
-packsswb  m1, [r2+mmsize*10+%%i]
-packsswb  m2, [r2+mmsize*12+%%i]
-packsswb  m3, [r2+mmsize*14+%%i]
-%else
 packsswb  m0, [r2+mmsize*1+%%i]
 packsswb  m1, [r2+mmsize*3+%%i]
 packsswb  m2, [r2+mmsize*5+%%i]
 packsswb  m3, [r2+mmsize*7+%%i]
-%endif
 paddb m0, m4
 paddb m1, m4
 paddb m2, m4
 paddb m3, m4
 movq   [r0 ], m0
-%if mmsize == 8
-movq   [r0+r1  ], m1
-movq   [r0+r1*2], m2
-movq   [r0+r3  ], m3
-%else
 movhps [r0+r1  ], m0
 movq   [r0+r1*2], m1
 movhps [r0+r3  ], m1
-%endif
 %if %%i == 0
 lea   r0, [r0+r1*4]
 %endif
-%if mmsize == 16
 movq   [r0 ], m2
 movhps [r0+r1  ], m2
 movq   [r0+r1*2], m3
 movhps [r0+r3  ], m3
-%endif
 %assign %%i %%i+8
 %endrep
 
@@ -621,7 +606,6 @@ cglobal vp3_idct_add, 3, 4, 9
 
 lea   r3, [r1*3]
 pxor  m4, m4
-%if mmsize == 16
 %assign %%i 0
 %rep 2
 movq  m0, [r0]
@@ -647,47 +631,6 @@ cglobal vp3_idct_add, 3, 4, 9
 %endif
 %assign %%i %%i+64
 %endrep
-%else
-%assign %%i 0
-%rep 2
-movq  m0, [r0]
-movq  m1, [r0+r1]
-movq  m2, [r0+r1*2]
-movq  m3, [r0+r3]
-movq  m5, m0
-movq  m6, m1
-movq  m7, m2
-punpcklbw m0, m4
-punpcklbw m1, m4
-punpcklbw m2, m4
-punpckhbw m5, m4
-punpckhbw m6, m4
-punpckhbw m7, m4
-paddswm0, [r2+ 0+%%i]
-paddswm1, [r2+16+%%i]
-paddswm2, [r2+32+%%i]
-paddswm5, [r2+64+%%i]
-paddswm6, [r2+80+%%i]
-paddswm7, [r2+96+%%i]
-packuswb  m0, m5
-movq  m5, m3
-punpcklbw m3, m4
-punpckhbw m5, m4
-packuswb  m1, m6
-paddswm3, [r2+48+%%i]
-paddswm5, [r2+112+%%i]
-packuswb  m2, m7
-packuswb  m3, m5
-movq   [r0 ], m0
-movq   [r0+r1  ], m1
-movq   [r0+r1*2], m2
-movq   [r0+r3  ], m3
-%if %%i == 0
-lea   r0, [r0+r1*4]
-%endif
-%assign %%i %%i+8
-%endrep
-%endif
 %assign %%i 0
 %rep 128/mmsize
 mova[r2+%%i], m4
@@ -696,11 +639,6 @@ cglobal vp3_idct_add, 3, 4, 9
 RET
 %endmacro
 
-%if ARCH_X86_32
-INIT_MMX mmx
-vp3_idct_funcs
-%endif
-
 INIT_XMM sse2
 vp3_idct_funcs
 
diff --git a/libavcodec/x86/vp3dsp_init.c b/libavcodec/x86/vp3dsp_init.c
index ba47e1c6cd..f54fa57b3e 100644
--- a/libavcodec/x86/vp3dsp_init.c
+++ b/libavcodec/x86/vp3dsp_init.c
@@ -26,9 +26,6 @@
 #include "libavcodec/avcodec.h"
 #include "libavcodec/vp3dsp.h"
 
-void ff_vp3_idct_put_mmx(uint8_t *dest, ptrdiff_t stride, int16_t *block);
-void ff_vp3_idct_add_mmx(uint8_t *dest, ptrdiff_t stride, int16_t *block);
-
 void ff_vp3_idct_put_sse2(uint8_t *dest, ptrdiff_t stride, int16_t *block);
 void ff_vp3_idct_add_sse2(uint8_t *dest, ptrdiff_t stride, int16_t *block);
 
@@ -49,10 +46,6 @@ av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags)
 
 if (EXTERNAL_MMX(cpu_flags)) {
 c->put_no_rnd_pixels_l2 = ff_put_vp_no_rnd_pixels8_l2_mmx;
-#if ARCH_X86_32
-c->idct_put  = ff_vp3_idct_put_mmx;
-c->idct_add  = ff_vp3_idct_add_mmx;
-#endif
 }
 
 if (EXTERNAL_MMXEXT(cpu_flags)) {

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/x86/mpegaudiodsp: Remove obsolete SSE function

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Tue Jun 14 20:21:03 2022 +0200| [25e39f8c727190520e1274be9bf0b4b3302587f9] | 
committer: Andreas Rheinhardt

avcodec/x86/mpegaudiodsp: Remove obsolete SSE function

The only systems which benefit from imdct36_blocks_sse are truely
ancient 32bit x86s as all other systems use at least the SSE2 versions
(this includes all x64 cpus (which is why this code is restricted
to x86-32)).

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=25e39f8c727190520e1274be9bf0b4b3302587f9
---

 libavcodec/x86/imdct36.asm|  5 -
 libavcodec/x86/mpegaudiodsp.c | 11 ---
 2 files changed, 16 deletions(-)

diff --git a/libavcodec/x86/imdct36.asm b/libavcodec/x86/imdct36.asm
index b386ab95fc..888c6bf4d6 100644
--- a/libavcodec/x86/imdct36.asm
+++ b/libavcodec/x86/imdct36.asm
@@ -373,11 +373,6 @@ cglobal imdct36_float, 4,4,9, out, buf, in, win
 RET
 %endmacro
 
-%if ARCH_X86_32
-INIT_XMM sse
-DEFINE_IMDCT
-%endif
-
 INIT_XMM sse2
 DEFINE_IMDCT
 
diff --git a/libavcodec/x86/mpegaudiodsp.c b/libavcodec/x86/mpegaudiodsp.c
index dcea94a1f5..6586fe0726 100644
--- a/libavcodec/x86/mpegaudiodsp.c
+++ b/libavcodec/x86/mpegaudiodsp.c
@@ -34,9 +34,6 @@ static void imdct36_blocks_ ## CPU(float *out, float *buf, 
float *in, int count,
 void ff_imdct36_float_ ## CPU(float *out, float *buf, float *in, float *win);
 
 #if HAVE_X86ASM
-#if ARCH_X86_32
-DECL(sse)
-#endif
 DECL(sse2)
 DECL(sse3)
 DECL(ssse3)
@@ -230,9 +227,6 @@ static void imdct36_blocks_ ## CPU1(float *out, float *buf, 
float *in,  \
 }
 
 #if HAVE_SSE
-#if ARCH_X86_32
-DECL_IMDCT_BLOCKS(sse,sse)
-#endif
 DECL_IMDCT_BLOCKS(sse2,sse)
 DECL_IMDCT_BLOCKS(sse3,sse)
 DECL_IMDCT_BLOCKS(ssse3,sse)
@@ -271,11 +265,6 @@ av_cold void ff_mpadsp_init_x86(MPADSPContext *s)
 
 #if HAVE_X86ASM
 #if HAVE_SSE
-#if ARCH_X86_32
-if (EXTERNAL_SSE(cpu_flags)) {
-s->imdct36_blocks_float = imdct36_blocks_sse;
-}
-#endif
 if (EXTERNAL_SSE2(cpu_flags)) {
 s->imdct36_blocks_float = imdct36_blocks_sse2;
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/x86/synth_filter: Remove obsolete SSE function

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Tue Jun 14 20:31:03 2022 +0200| [eb33fd384e70900644b5c1a06e266819af32b02e] | 
committer: Andreas Rheinhardt

avcodec/x86/synth_filter: Remove obsolete SSE function

The only systems which benefit from synth_filter_sse are truely
ancient 32bit x86s as all other systems use at least the SSE2 versions
(this includes all x64 cpus (which is why this code is restricted
to x86-32)).

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=eb33fd384e70900644b5c1a06e266819af32b02e
---

 libavcodec/x86/synth_filter.asm| 6 +-
 libavcodec/x86/synth_filter_init.c | 8 
 2 files changed, 1 insertion(+), 13 deletions(-)

diff --git a/libavcodec/x86/synth_filter.asm b/libavcodec/x86/synth_filter.asm
index bc1a48f409..22f57c3309 100644
--- a/libavcodec/x86/synth_filter.asm
+++ b/libavcodec/x86/synth_filter.asm
@@ -115,7 +115,7 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * 
ARCH_X86_64, \
   synth_buf, synth_buf2, window, out, off, scale
 %define scale m0
 %if ARCH_X86_32 || WIN64
-%if cpuflag(sse2) && notcpuflag(avx)
+%if notcpuflag(avx)
 movd   scale, scalem
 SPLATDm0
 %else
@@ -234,10 +234,6 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 
* ARCH_X86_64, \
 RET
 %endmacro
 
-%if ARCH_X86_32
-INIT_XMM sse
-SYNTH_FILTER
-%endif
 INIT_XMM sse2
 SYNTH_FILTER
 INIT_YMM avx
diff --git a/libavcodec/x86/synth_filter_init.c 
b/libavcodec/x86/synth_filter_init.c
index 35e2b47a3e..7c76ac8d05 100644
--- a/libavcodec/x86/synth_filter_init.c
+++ b/libavcodec/x86/synth_filter_init.c
@@ -43,9 +43,6 @@ static void synth_filter_##opt(FFTContext *imdct, 
 \
 }  
\
 
 #if HAVE_X86ASM
-#if ARCH_X86_32
-SYNTH_FILTER_FUNC(sse)
-#endif
 SYNTH_FILTER_FUNC(sse2)
 SYNTH_FILTER_FUNC(avx)
 SYNTH_FILTER_FUNC(fma3)
@@ -56,11 +53,6 @@ av_cold void ff_synth_filter_init_x86(SynthFilterContext *s)
 #if HAVE_X86ASM
 int cpu_flags = av_get_cpu_flags();
 
-#if ARCH_X86_32
-if (EXTERNAL_SSE(cpu_flags)) {
-s->synth_filter_float = synth_filter_sse;
-}
-#endif
 if (EXTERNAL_SSE2(cpu_flags)) {
 s->synth_filter_float = synth_filter_sse2;
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/x86/vp6dsp: Remove obsolete MMX ff_vp6_filter_diag4_mmx

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Mon Jun 13 17:02:42 2022 +0200| [6cb3ee80b3b58d692a722fb38ee05f170ae8b0d2] | 
committer: Andreas Rheinhardt

avcodec/x86/vp6dsp: Remove obsolete MMX ff_vp6_filter_diag4_mmx

The only systems which benefit from it are truely ancient
32bit x86s as all other systems use at least the SSE2 versions
(this includes all x64 cpus (which is why this code is restricted
to x86-32)).

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6cb3ee80b3b58d692a722fb38ee05f170ae8b0d2
---

 libavcodec/x86/vp6dsp.asm| 16 +---
 libavcodec/x86/vp6dsp_init.c |  7 ---
 2 files changed, 1 insertion(+), 22 deletions(-)

diff --git a/libavcodec/x86/vp6dsp.asm b/libavcodec/x86/vp6dsp.asm
index 0be531e5c2..512fe89def 100644
--- a/libavcodec/x86/vp6dsp.asm
+++ b/libavcodec/x86/vp6dsp.asm
@@ -114,18 +114,13 @@ SECTION .text
 %endif ; mmsize == 8/16
 %endmacro
 
-%macro vp6_filter_diag4 0
 ; void ff_vp6_filter_diag4_(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
 ;const int16_t h_weight[4], const int16_t 
v_weights[4])
+INIT_XMM sse2
 cglobal vp6_filter_diag4, 5, 7, 8
 mov  r5, rsp ; backup stack pointer
 and rsp, ~(mmsize-1) ; align stack
-%if mmsize == 16
 sub rsp, 8*11
-%else
-sub rsp, 8*15
-movq m6, [pw_64]
-%endif
 
 sub  r1, r2
 
@@ -156,12 +151,3 @@ cglobal vp6_filter_diag4, 5, 7, 8
 
 mov rsp, r5  ; restore stack pointer
 RET
-%endmacro
-
-%if ARCH_X86_32
-INIT_MMX mmx
-vp6_filter_diag4
-%endif
-
-INIT_XMM sse2
-vp6_filter_diag4
diff --git a/libavcodec/x86/vp6dsp_init.c b/libavcodec/x86/vp6dsp_init.c
index ce498931d0..83d45ec36c 100644
--- a/libavcodec/x86/vp6dsp_init.c
+++ b/libavcodec/x86/vp6dsp_init.c
@@ -25,8 +25,6 @@
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/vp56dsp.h"
 
-void ff_vp6_filter_diag4_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
- const int16_t *h_weights,const int16_t 
*v_weights);
 void ff_vp6_filter_diag4_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
   const int16_t *h_weights,const int16_t 
*v_weights);
 
@@ -34,11 +32,6 @@ av_cold void ff_vp6dsp_init_x86(VP56DSPContext *c)
 {
 int cpu_flags = av_get_cpu_flags();
 
-#if ARCH_X86_32
-if (EXTERNAL_MMX(cpu_flags)) {
-c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx;
-}
-#endif
 if (EXTERNAL_SSE2(cpu_flags)) {
 c->vp6_filter_diag4 = ff_vp6_filter_diag4_sse2;
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/x86/rv34dsp: Remove obsolete MMX function

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Tue Jun 14 20:34:24 2022 +0200| [8360354ae81982d6510fa54979c23f714b0790e2] | 
committer: Andreas Rheinhardt

avcodec/x86/rv34dsp: Remove obsolete MMX function

The only systems which benefit from ff_rv34_idct_dc_add_mmx are truely
ancient 32bit x86s as all other systems use at least the SSE2 versions
(this includes all x64 cpus (which is why this code is restricted
to x86-32)).

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8360354ae81982d6510fa54979c23f714b0790e2
---

 libavcodec/x86/rv34dsp.asm| 37 -
 libavcodec/x86/rv34dsp_init.c |  3 ---
 2 files changed, 40 deletions(-)

diff --git a/libavcodec/x86/rv34dsp.asm b/libavcodec/x86/rv34dsp.asm
index 5568ddfdf8..0a3d99c53f 100644
--- a/libavcodec/x86/rv34dsp.asm
+++ b/libavcodec/x86/rv34dsp.asm
@@ -56,43 +56,6 @@ cglobal rv34_idct_dc_noround, 1, 2, 0
 movq[r0+24], m0
 REP_RET
 
-; ff_rv34_idct_dc_add_mmx(uint8_t *dst, int stride, int dc);
-%if ARCH_X86_32
-INIT_MMX mmx
-cglobal rv34_idct_dc_add, 3, 3
-; calculate DC
-IDCT_DC_ROUND r2
-pxor   m1, m1
-movd   m0, r2d
-psubw  m1, m0
-packuswb   m0, m0
-packuswb   m1, m1
-punpcklbw  m0, m0
-punpcklbw  m1, m1
-punpcklwd  m0, m0
-punpcklwd  m1, m1
-
-; add DC
-lear2, [r0+r1*2]
-movh   m2, [r0]
-movh   m3, [r0+r1]
-movh   m4, [r2]
-movh   m5, [r2+r1]
-paddusbm2, m0
-paddusbm3, m0
-paddusbm4, m0
-paddusbm5, m0
-psubusbm2, m1
-psubusbm3, m1
-psubusbm4, m1
-psubusbm5, m1
-movh   [r0], m2
-movh   [r0+r1], m3
-movh   [r2], m4
-movh   [r2+r1], m5
-RET
-%endif
-
 ; Load coeffs and perform row transform
 ; Output: coeffs in mm[0467], rounder in mm5
 %macro ROW_TRANSFORM  1
diff --git a/libavcodec/x86/rv34dsp_init.c b/libavcodec/x86/rv34dsp_init.c
index 7310122458..caa5c2d653 100644
--- a/libavcodec/x86/rv34dsp_init.c
+++ b/libavcodec/x86/rv34dsp_init.c
@@ -26,7 +26,6 @@
 
 void ff_rv34_idct_dc_mmxext(int16_t *block);
 void ff_rv34_idct_dc_noround_mmxext(int16_t *block);
-void ff_rv34_idct_dc_add_mmx(uint8_t *dst, ptrdiff_t stride, int dc);
 void ff_rv34_idct_dc_add_sse2(uint8_t *dst, ptrdiff_t stride, int dc);
 void ff_rv34_idct_dc_add_sse4(uint8_t *dst, ptrdiff_t stride, int dc);
 void ff_rv34_idct_add_mmxext(uint8_t *dst, ptrdiff_t stride, int16_t *block);
@@ -35,8 +34,6 @@ av_cold void ff_rv34dsp_init_x86(RV34DSPContext* c)
 {
 int cpu_flags = av_get_cpu_flags();
 
-if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags))
-c->rv34_idct_dc_add = ff_rv34_idct_dc_add_mmx;
 if (EXTERNAL_MMXEXT(cpu_flags)) {
 c->rv34_inv_transform_dc = ff_rv34_idct_dc_noround_mmxext;
 c->rv34_idct_add = ff_rv34_idct_add_mmxext;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/x86/dcadsp: Remove obsolete SSE function

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Mon Jun 20 03:07:41 2022 +0200| [61e3cccd367a1daf4aedffa65f5be038aa5cebe1] | 
committer: Andreas Rheinhardt

avcodec/x86/dcadsp: Remove obsolete SSE function

The only systems which benefit from ff_lfe_fir0_float_sse are truely
ancient 32bit x86s as all other systems use at least the SSE2 versions
(this includes all x64 cpus (which is why this code is restricted
to x86-32)).

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=61e3cccd367a1daf4aedffa65f5be038aa5cebe1
---

 libavcodec/x86/dcadsp.asm| 17 ++---
 libavcodec/x86/dcadsp_init.c |  3 ---
 2 files changed, 2 insertions(+), 18 deletions(-)

diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm
index 055361a765..de9fd6f346 100644
--- a/libavcodec/x86/dcadsp.asm
+++ b/libavcodec/x86/dcadsp.asm
@@ -42,22 +42,13 @@ cglobal lfe_fir0_float, 4, 6, 12 + cpuflag(fma3)*4, 
samples, lfe, coeff, nblocks
 cvtdq2ps  m5, [lfeq   ]
 shufpsm7, m4, m4, q0123
 shufpsm6, m5, m5, q0123
-%elif cpuflag(sse2)
+%else
 movu  m4, [lfeq+16]
 movu  m5, [lfeq   ]
 cvtdq2ps  m4, m4
 cvtdq2ps  m5, m5
 pshufdm7, m4, q0123
 pshufdm6, m5, q0123
-%else
-cvtpi2ps  m4, [lfeq+16]
-cvtpi2ps  m0, [lfeq+24]
-cvtpi2ps  m5, [lfeq   ]
-cvtpi2ps  m1, [lfeq+8 ]
-shufpsm4, m0, q1010
-shufpsm5, m1, q1010
-shufpsm7, m4, m4, q0123
-shufpsm6, m5, m5, q0123
 %endif
 
 .inner_loop:
@@ -206,10 +197,6 @@ cglobal lfe_fir0_float, 4, 6, 12 + cpuflag(fma3)*4, 
samples, lfe, coeff, nblocks
 RET
 %endmacro
 
-%if ARCH_X86_32
-INIT_XMM sse
-LFE_FIR0_FLOAT
-%endif
 INIT_XMM sse2
 LFE_FIR0_FLOAT
 %if HAVE_AVX_EXTERNAL
@@ -235,7 +222,7 @@ cglobal lfe_fir1_float, 4, 6, 10, samples, lfe, coeff, 
nblocks, cnt1, cnt2
 %if cpuflag(avx)
 cvtdq2ps  m4, [lfeq]
 shufpsm5, m4, m4, q0123
-%elif cpuflag(sse2)
+%else
 movu  m4, [lfeq]
 cvtdq2ps  m4, m4
 pshufdm5, m4, q0123
diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c
index fc10fb8bc5..0c78dd1c9e 100644
--- a/libavcodec/x86/dcadsp_init.c
+++ b/libavcodec/x86/dcadsp_init.c
@@ -27,7 +27,6 @@ void ff_lfe_fir0_float_##opt(float *pcm_samples, int32_t 
*lfe_samples, \
 void ff_lfe_fir1_float_##opt(float *pcm_samples, int32_t *lfe_samples, 
\
  const float *filter_coeff, ptrdiff_t npcmblocks);
 
-LFE_FIR_FLOAT_FUNC(sse)
 LFE_FIR_FLOAT_FUNC(sse2)
 LFE_FIR_FLOAT_FUNC(sse3)
 LFE_FIR_FLOAT_FUNC(avx)
@@ -37,8 +36,6 @@ av_cold void ff_dcadsp_init_x86(DCADSPContext *s)
 {
 int cpu_flags = av_get_cpu_flags();
 
-if (ARCH_X86_32 && EXTERNAL_SSE(cpu_flags))
-s->lfe_fir_float[0] = ff_lfe_fir0_float_sse;
 if (EXTERNAL_SSE2(cpu_flags))
 s->lfe_fir_float[0] = ff_lfe_fir0_float_sse2;
 if (EXTERNAL_SSE3(cpu_flags))

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/x86/huffyuvdsp: Remove obsolete MMX functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Mon Jun 20 06:41:42 2022 +0200| [4b6ffc2880e33d05ed1ab6bbc38e5a795f14b504] | 
committer: Andreas Rheinhardt

avcodec/x86/huffyuvdsp: Remove obsolete MMX functions

The only systems which benefit from these are truely
ancient 32bit x86s as all other systems use at least the SSE2 versions
(this includes all x64 cpus (which is why this code is restricted
to x86-32)).

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4b6ffc2880e33d05ed1ab6bbc38e5a795f14b504
---

 libavcodec/x86/huffyuvdsp.asm| 23 +--
 libavcodec/x86/huffyuvdsp_init.c |  8 
 2 files changed, 1 insertion(+), 30 deletions(-)

diff --git a/libavcodec/x86/huffyuvdsp.asm b/libavcodec/x86/huffyuvdsp.asm
index a1231f1b22..c5c40e991b 100644
--- a/libavcodec/x86/huffyuvdsp.asm
+++ b/libavcodec/x86/huffyuvdsp.asm
@@ -32,24 +32,15 @@ SECTION .text
 
 %macro ADD_INT16 0
 cglobal add_int16, 4,4,5, dst, src, mask, w, tmp
-%if mmsize > 8
 test srcq, mmsize-1
 jnz .unaligned
 test dstq, mmsize-1
 jnz .unaligned
-%endif
 INT16_LOOP a, add
-%if mmsize > 8
 .unaligned:
 INT16_LOOP u, add
-%endif
 %endmacro
 
-%if ARCH_X86_32
-INIT_MMX mmx
-ADD_INT16
-%endif
-
 INIT_XMM sse2
 ADD_INT16
 
@@ -60,7 +51,7 @@ ADD_INT16
 
 ; void add_hfyu_left_pred_bgr32(uint8_t *dst, const uint8_t *src,
 ;   intptr_t w, uint8_t *left)
-%macro LEFT_BGR32 0
+INIT_XMM sse2
 cglobal add_hfyu_left_pred_bgr32, 4,4,3, dst, src, w, left
 shl   wq, 2
 movd  m0, [leftq]
@@ -71,17 +62,12 @@ cglobal add_hfyu_left_pred_bgr32, 4,4,3, dst, src, w, left
 .loop:
 movu  m1, [srcq+wq]
 mova  m2, m1
-%if mmsize == 8
-punpckhdq m0, m0
-%endif
 LSHIFTm1, 4
 paddb m1, m2
-%if mmsize == 16
 pshufdm0, m0, q
 mova  m2, m1
 LSHIFTm1, 8
 paddb m1, m2
-%endif
 paddb m0, m1
 movu   [dstq+wq], m0
 add   wq, mmsize
@@ -89,14 +75,7 @@ cglobal add_hfyu_left_pred_bgr32, 4,4,3, dst, src, w, left
 movd  m0, [dstq-4]
 movd [leftq], m0
 REP_RET
-%endmacro
 
-%if ARCH_X86_32
-INIT_MMX mmx
-LEFT_BGR32
-%endif
-INIT_XMM sse2
-LEFT_BGR32
 
 ; void add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, 
const uint8_t *diff, int mask, int w, int *left, int *left_top)
 INIT_MMX mmxext
diff --git a/libavcodec/x86/huffyuvdsp_init.c b/libavcodec/x86/huffyuvdsp_init.c
index eb10de383d..239d3ca313 100644
--- a/libavcodec/x86/huffyuvdsp_init.c
+++ b/libavcodec/x86/huffyuvdsp_init.c
@@ -26,12 +26,9 @@
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/huffyuvdsp.h"
 
-void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int 
w);
 void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int 
w);
 void ff_add_int16_avx2(uint16_t *dst, const uint16_t *src, unsigned mask, int 
w);
 
-void ff_add_hfyu_left_pred_bgr32_mmx(uint8_t *dst, const uint8_t *src,
- intptr_t w, uint8_t *left);
 void ff_add_hfyu_left_pred_bgr32_sse2(uint8_t *dst, const uint8_t *src,
   intptr_t w, uint8_t *left);
 void ff_add_hfyu_median_pred_int16_mmxext(uint16_t *dst, const uint16_t *top, 
const uint16_t *diff, unsigned mask, int w, int *left, int *left_top);
@@ -41,11 +38,6 @@ av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c, 
enum AVPixelFormat pix
 int cpu_flags = av_get_cpu_flags();
 const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(pix_fmt);
 
-if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) {
-c->add_hfyu_left_pred_bgr32 = ff_add_hfyu_left_pred_bgr32_mmx;
-c->add_int16 = ff_add_int16_mmx;
-}
-
 if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc && pix_desc->comp[0].depth<16) {
 c->add_hfyu_median_pred_int16 = ff_add_hfyu_median_pred_int16_mmxext;
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/x86/vp8dsp: Remove obsolete MMX(EXT) functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Mon Jun 20 05:55:04 2022 +0200| [6a551f14050674fb685920eb1b0640810cacccf9] | 
committer: Andreas Rheinhardt

avcodec/x86/vp8dsp: Remove obsolete MMX(EXT) functions

The only systems which benefit from these are truely
ancient 32bit x86s as all other systems use at least the SSE2 versions
(this includes all x64 cpus (which is why this code is restricted
to x86-32)).

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6a551f14050674fb685920eb1b0640810cacccf9
---

 libavcodec/x86/vp8dsp.asm| 119 +---
 libavcodec/x86/vp8dsp_init.c |  84 -
 libavcodec/x86/vp8dsp_loopfilter.asm | 354 +--
 3 files changed, 4 insertions(+), 553 deletions(-)

diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm
index 75de5690a1..1c59e884ed 100644
--- a/libavcodec/x86/vp8dsp.asm
+++ b/libavcodec/x86/vp8dsp.asm
@@ -840,25 +840,6 @@ cglobal put_vp8_pixels8, 5, 5, 0, dst, dststride, src, 
srcstride, height
 jg .nextrow
 REP_RET
 
-%if ARCH_X86_32
-INIT_MMX mmx
-cglobal put_vp8_pixels16, 5, 5, 0, dst, dststride, src, srcstride, height
-.nextrow:
-movqmm0, [srcq+srcstrideq*0+0]
-movqmm1, [srcq+srcstrideq*0+8]
-movqmm2, [srcq+srcstrideq*1+0]
-movqmm3, [srcq+srcstrideq*1+8]
-leasrcq, [srcq+srcstrideq*2]
-movq [dstq+dststrideq*0+0], mm0
-movq [dstq+dststrideq*0+8], mm1
-movq [dstq+dststrideq*1+0], mm2
-movq [dstq+dststrideq*1+8], mm3
-leadstq, [dstq+dststrideq*2]
-sub heightd, 2
-jg .nextrow
-REP_RET
-%endif
-
 INIT_XMM sse
 cglobal put_vp8_pixels16, 5, 5, 2, dst, dststride, src, srcstride, height
 .nextrow:
@@ -895,32 +876,6 @@ cglobal put_vp8_pixels16, 5, 5, 2, dst, dststride, src, 
srcstride, height
 %4 [dst2q+strideq+%3], m5
 %endmacro
 
-%if ARCH_X86_32
-INIT_MMX mmx
-cglobal vp8_idct_dc_add, 3, 3, 0, dst, block, stride
-; load data
-movd   m0, [blockq]
-
-; calculate DC
-paddw  m0, [pw_4]
-pxor   m1, m1
-psraw  m0, 3
-movd [blockq], m1
-psubw  m1, m0
-packuswb   m0, m0
-packuswb   m1, m1
-punpcklbw  m0, m0
-punpcklbw  m1, m1
-punpcklwd  m0, m0
-punpcklwd  m1, m1
-
-; add DC
-DEFINE_ARGS dst1, dst2, stride
-lea dst2q, [dst1q+strideq*2]
-ADD_DC m0, m1, 0, movh
-RET
-%endif
-
 %macro VP8_IDCT_DC_ADD 0
 cglobal vp8_idct_dc_add, 3, 3, 6, dst, block, stride
 ; load data
@@ -971,44 +926,6 @@ VP8_IDCT_DC_ADD
 ; void ff_vp8_idct_dc_add4y_(uint8_t *dst, int16_t block[4][16], 
ptrdiff_t stride);
 ;-
 
-%if ARCH_X86_32
-INIT_MMX mmx
-cglobal vp8_idct_dc_add4y, 3, 3, 0, dst, block, stride
-; load data
-movd  m0, [blockq+32*0] ; A
-movd  m1, [blockq+32*2] ; C
-punpcklwd m0, [blockq+32*1] ; A B
-punpcklwd m1, [blockq+32*3] ; C D
-punpckldq m0, m1; A B C D
-pxor  m6, m6
-
-; calculate DC
-paddw m0, [pw_4]
-movd [blockq+32*0], m6
-movd [blockq+32*1], m6
-movd [blockq+32*2], m6
-movd [blockq+32*3], m6
-psraw m0, 3
-psubw m6, m0
-packuswb  m0, m0
-packuswb  m6, m6
-punpcklbw m0, m0 ; AABBCCDD
-punpcklbw m6, m6 ; AABBCCDD
-movq  m1, m0
-movq  m7, m6
-punpcklbw m0, m0 ; 
-punpckhbw m1, m1 ; 
-punpcklbw m6, m6 ; 
-punpckhbw m7, m7 ; 
-
-; add DC
-DEFINE_ARGS dst1, dst2, stride
-leadst2q, [dst1q+strideq*2]
-ADD_DCm0, m6, 0, mova
-ADD_DCm1, m7, 8, mova
-RET
-%endif
-
 INIT_XMM sse2
 cglobal vp8_idct_dc_add4y, 3, 3, 6, dst, block, stride
 ; load data
@@ -1117,7 +1034,7 @@ cglobal vp8_idct_dc_add4uv, 3, 3, 0, dst, block, stride
 SWAP %4,  %3
 %endmacro
 
-%macro VP8_IDCT_ADD 0
+INIT_MMX sse
 cglobal vp8_idct_add, 3, 3, 0, dst, block, stride
 ; load block data
 movq m0, [blockq+ 0]
@@ -1126,17 +1043,9 @@ cglobal vp8_idct_add, 3, 3, 0, dst, block, stride
 movq m3, [blockq+24]
 movq m6, [pw_20091]
 movq m7, [pw_17734]
-%if cpuflag(sse)
 xorps  xmm0, xmm0
 movaps [blockq+ 0], xmm0
 movaps [blockq+16], xmm0
-%else
-pxor m4, m4
-movq [blockq+ 0], m4
-movq [blockq+ 8], m4
-movq [blockq+16], m4
-movq [blockq+24], m4
-%endif
 
 ; actual IDCT
 VP8_IDCT_TRANSFORM4x4_1D 0, 1, 2, 3, 4, 5
@@ -1153,14 +1062,6 @@ cglobal vp8_idct_add, 3, 3, 0, dst, block, stride
 STORE_DIFFx2 m2, m3, m6, m7, m4, 3, dst2q, strideq
 
 RET
-%endmacro
-
-%if ARCH_X86_32
-INIT_MMX mmx
-VP8_IDCT_ADD
-%endif
-INIT_MMX sse
-VP8_IDCT_ADD
 
 ;-
 ; void ff_vp8_luma_dc_wht(int16_t block[4][4][16], int16_t dc[16])
@@ -1193,23 +1094,15 @@ VP8_IDCT_ADD
   

[FFmpeg-cvslog] avcodec/x86/dirac_dwt: Remove obsolete MMX functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Mon Jun 20 07:08:31 2022 +0200| [5e332fe35cd336a5c7718d5e9a5a93ece0e61a3a] | 
committer: Andreas Rheinhardt

avcodec/x86/dirac_dwt: Remove obsolete MMX functions

The only systems which benefit from these are truely
ancient 32bit x86s as all other systems use at least the SSE2 versions
(this includes all x64 cpus (which is why this code is restricted
to x86-32)).

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=5e332fe35cd336a5c7718d5e9a5a93ece0e61a3a
---

 libavcodec/x86/dirac_dwt.asm|  8 
 libavcodec/x86/dirac_dwt_init.c | 31 ---
 2 files changed, 39 deletions(-)

diff --git a/libavcodec/x86/dirac_dwt.asm b/libavcodec/x86/dirac_dwt.asm
index 22a5c2..6c8b3c0d88 100644
--- a/libavcodec/x86/dirac_dwt.asm
+++ b/libavcodec/x86/dirac_dwt.asm
@@ -293,14 +293,6 @@ cglobal horizontal_compose_dd97i_ssse3, 3,6,8, b, tmp, w, 
x, w2, b_w2
 REP_RET
 
 
-%if ARCH_X86_64 == 0
-INIT_MMX
-COMPOSE_VERTICAL mmx
-HAAR_HORIZONTAL mmx, 0
-HAAR_HORIZONTAL mmx, 1
-%endif
-
-;;INIT_XMM
 INIT_XMM
 COMPOSE_VERTICAL sse2
 HAAR_HORIZONTAL sse2, 0
diff --git a/libavcodec/x86/dirac_dwt_init.c b/libavcodec/x86/dirac_dwt_init.c
index 49a6380add..9200618283 100644
--- a/libavcodec/x86/dirac_dwt_init.c
+++ b/libavcodec/x86/dirac_dwt_init.c
@@ -134,9 +134,6 @@ static void horizontal_compose_haar1i##ext(uint8_t *_b, 
uint8_t *_tmp, int w)\
 \
 
 #if HAVE_X86ASM
-#if !ARCH_X86_64
-COMPOSE_VERTICAL(_mmx, 4)
-#endif
 COMPOSE_VERTICAL(_sse2, 8)
 
 
@@ -163,34 +160,6 @@ void ff_spatial_idwt_init_x86(DWTContext *d, enum dwt_type 
type)
 #if HAVE_X86ASM
   int mm_flags = av_get_cpu_flags();
 
-#if !ARCH_X86_64
-if (!(mm_flags & AV_CPU_FLAG_MMX))
-return;
-
-switch (type) {
-case DWT_DIRAC_DD9_7:
-d->vertical_compose_l0 = (void*)vertical_compose53iL0_mmx;
-d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_mmx;
-break;
-case DWT_DIRAC_LEGALL5_3:
-d->vertical_compose_l0 = (void*)vertical_compose53iL0_mmx;
-d->vertical_compose_h0 = (void*)vertical_compose_dirac53iH0_mmx;
-break;
-case DWT_DIRAC_DD13_7:
-d->vertical_compose_l0 = (void*)vertical_compose_dd137iL0_mmx;
-d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_mmx;
-break;
-case DWT_DIRAC_HAAR0:
-d->vertical_compose   = (void*)vertical_compose_haar_mmx;
-d->horizontal_compose = horizontal_compose_haar0i_mmx;
-break;
-case DWT_DIRAC_HAAR1:
-d->vertical_compose   = (void*)vertical_compose_haar_mmx;
-d->horizontal_compose = horizontal_compose_haar1i_mmx;
-break;
-}
-#endif
-
 if (!(mm_flags & AV_CPU_FLAG_SSE2))
 return;
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/x86/huffyuvencdsp: Remove obsolete MMX function

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Mon Jun 20 07:14:47 2022 +0200| [839fbe0e98881f020e41dc7151d08f2ccb314398] | 
committer: Andreas Rheinhardt

avcodec/x86/huffyuvencdsp: Remove obsolete MMX function

The only systems which benefit from ff_diff_int16_mmx are truely
ancient 32bit x86s as all other systems use at least the SSE2 versions
(this includes all x64 cpus (which is why this code is restricted
to x86-32)).

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=839fbe0e98881f020e41dc7151d08f2ccb314398
---

 libavcodec/x86/huffyuvencdsp.asm| 9 -
 libavcodec/x86/huffyuvencdsp_init.c | 6 --
 2 files changed, 15 deletions(-)

diff --git a/libavcodec/x86/huffyuvencdsp.asm b/libavcodec/x86/huffyuvencdsp.asm
index d994fd0fd6..8bfd0face0 100644
--- a/libavcodec/x86/huffyuvencdsp.asm
+++ b/libavcodec/x86/huffyuvencdsp.asm
@@ -36,26 +36,17 @@ SECTION .text
 
 %macro DIFF_INT16 0
 cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w, tmp
-%if mmsize > 8
 test src1q, mmsize-1
 jnz .unaligned
 test src2q, mmsize-1
 jnz .unaligned
 test dstq, mmsize-1
 jnz .unaligned
-%endif
 INT16_LOOP a, sub
-%if mmsize > 8
 .unaligned:
 INT16_LOOP u, sub
-%endif
 %endmacro
 
-%if ARCH_X86_32
-INIT_MMX mmx
-DIFF_INT16
-%endif
-
 INIT_XMM sse2
 DIFF_INT16
 
diff --git a/libavcodec/x86/huffyuvencdsp_init.c 
b/libavcodec/x86/huffyuvencdsp_init.c
index 6c6e068cf8..cc6dc5a560 100644
--- a/libavcodec/x86/huffyuvencdsp_init.c
+++ b/libavcodec/x86/huffyuvencdsp_init.c
@@ -28,8 +28,6 @@
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/huffyuvencdsp.h"
 
-void ff_diff_int16_mmx (uint16_t *dst, const uint16_t *src1, const uint16_t 
*src2,
-unsigned mask, int w);
 void ff_diff_int16_sse2(uint16_t *dst, const uint16_t *src1, const uint16_t 
*src2,
 unsigned mask, int w);
 void ff_diff_int16_avx2(uint16_t *dst, const uint16_t *src1, const uint16_t 
*src2,
@@ -42,10 +40,6 @@ av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext 
*c, AVCodecContext *
 av_unused int cpu_flags = av_get_cpu_flags();
 const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(avctx->pix_fmt);
 
-if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) {
-c->diff_int16 = ff_diff_int16_mmx;
-}
-
 if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc && pix_desc->comp[0].depth<16) {
 c->sub_hfyu_median_pred_int16 = ff_sub_hfyu_median_pred_int16_mmxext;
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/x86/lossless_videoencdsp: Remove obsolete MMX function

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Mon Jun 20 07:20:40 2022 +0200| [230ea38de143368729ee1cce47b3a87fbafad8e4] | 
committer: Andreas Rheinhardt

avcodec/x86/lossless_videoencdsp: Remove obsolete MMX function

The only systems which benefit from ff_diff_bytes_mmx are truely
ancient 32bit x86s as all other systems use at least the SSE2 versions
(this includes all x64 cpus (which is why this code is restricted
to x86-32)).

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=230ea38de143368729ee1cce47b3a87fbafad8e4
---

 libavcodec/x86/lossless_videoencdsp.asm| 9 -
 libavcodec/x86/lossless_videoencdsp_init.c | 6 --
 2 files changed, 15 deletions(-)

diff --git a/libavcodec/x86/lossless_videoencdsp.asm 
b/libavcodec/x86/lossless_videoencdsp.asm
index fb1204f0f1..2e1d01bc2c 100644
--- a/libavcodec/x86/lossless_videoencdsp.asm
+++ b/libavcodec/x86/lossless_videoencdsp.asm
@@ -113,15 +113,6 @@ cglobal diff_bytes, 4,5,2, dst, src1, src2, w
 REP_RET
 %endmacro
 
-%if ARCH_X86_32
-INIT_MMX mmx
-DIFF_BYTES_PROLOGUE
-%define regsize mmsize
-DIFF_BYTES_LOOP_PREP .skip_main_aa, .end_aa
-DIFF_BYTES_BODYa, a
-%undef i
-%endif
-
 INIT_XMM sse2
 DIFF_BYTES_PROLOGUE
 %define regsize mmsize
diff --git a/libavcodec/x86/lossless_videoencdsp_init.c 
b/libavcodec/x86/lossless_videoencdsp_init.c
index 40407add52..b3efcfdcd7 100644
--- a/libavcodec/x86/lossless_videoencdsp_init.c
+++ b/libavcodec/x86/lossless_videoencdsp_init.c
@@ -29,8 +29,6 @@
 #include "libavcodec/lossless_videoencdsp.h"
 #include "libavcodec/mathops.h"
 
-void ff_diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
-   intptr_t w);
 void ff_diff_bytes_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
 intptr_t w);
 void ff_diff_bytes_avx2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
@@ -87,10 +85,6 @@ av_cold void ff_llvidencdsp_init_x86(LLVidEncDSPContext *c)
 {
 av_unused int cpu_flags = av_get_cpu_flags();
 
-if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) {
-c->diff_bytes = ff_diff_bytes_mmx;
-}
-
 #if HAVE_INLINE_ASM
 if (INLINE_MMXEXT(cpu_flags)) {
 c->sub_median_pred = sub_median_pred_mmxext;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/x86/lossless_videodsp: Remove obsolete MMX(EXT) functions

2022-06-22 Thread Andreas Rheinhardt
ffmpeg | branch: master | Andreas Rheinhardt  | 
Mon Jun 20 07:31:42 2022 +0200| [fed07efcde72824ac1ada80d4af4e91ac4fcfc14] | 
committer: Andreas Rheinhardt

avcodec/x86/lossless_videodsp: Remove obsolete MMX(EXT) functions

The only systems which benefit from these are truely
ancient 32bit x86s as all other systems use at least the SSE2 versions
(this includes all x64 cpus (which is why this code is restricted
to x86-32)).

Signed-off-by: Andreas Rheinhardt 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=fed07efcde72824ac1ada80d4af4e91ac4fcfc14
---

 libavcodec/x86/lossless_videodsp.asm| 20 +++-
 libavcodec/x86/lossless_videodsp_init.c | 58 -
 2 files changed, 4 insertions(+), 74 deletions(-)

diff --git a/libavcodec/x86/lossless_videodsp.asm 
b/libavcodec/x86/lossless_videodsp.asm
index 0a1b7091c9..eb1b80506e 100644
--- a/libavcodec/x86/lossless_videodsp.asm
+++ b/libavcodec/x86/lossless_videodsp.asm
@@ -38,11 +38,11 @@ pb_67676767: db -1,-1,-1,-1,-1,-1,-1,-1, 6, 7, 6, 
7, 6, 7, 6, 7
 SECTION .text
 
 ;--
-; void ff_add_median_pred_mmxext(uint8_t *dst, const uint8_t *top,
-;const uint8_t *diff, int w,
-;int *left, int *left_top)
+; void ff_add_median_pred(uint8_t *dst, const uint8_t *top,
+; const uint8_t *diff, int w,
+; int *left, int *left_top)
 ;--
-%macro MEDIAN_PRED 0
+INIT_XMM sse2
 cglobal add_median_pred, 6,6,8, dst, top, diff, w, left, left_top
 movum0, [topq]
 movam2, m0
@@ -100,14 +100,6 @@ cglobal add_median_pred, 6,6,8, dst, top, diff, w, left, 
left_top
 movzx   r2d, byte [topq-1]
 mov [left_topq], r2d
 RET
-%endmacro
-
-%if ARCH_X86_32
-INIT_MMX mmxext
-MEDIAN_PRED
-%endif
-INIT_XMM sse2
-MEDIAN_PRED
 
 
 %macro ADD_LEFT_LOOP 2 ; %1 = dst_is_aligned, %2 = src_is_aligned
@@ -240,10 +232,6 @@ cglobal add_bytes, 3,4,2, dst, src, w, size
 REP_RET
 %endmacro
 
-%if ARCH_X86_32
-INIT_MMX mmx
-ADD_BYTES
-%endif
 INIT_XMM sse2
 ADD_BYTES
 
diff --git a/libavcodec/x86/lossless_videodsp_init.c 
b/libavcodec/x86/lossless_videodsp_init.c
index 6d71f14e7f..5690cacaad 100644
--- a/libavcodec/x86/lossless_videodsp_init.c
+++ b/libavcodec/x86/lossless_videodsp_init.c
@@ -19,17 +19,12 @@
  */
 
 #include "config.h"
-#include "libavutil/x86/asm.h"
 #include "../lossless_videodsp.h"
 #include "libavutil/x86/cpu.h"
 
-void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t w);
 void ff_add_bytes_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t w);
 void ff_add_bytes_avx2(uint8_t *dst, uint8_t *src, ptrdiff_t w);
 
-void ff_add_median_pred_mmxext(uint8_t *dst, const uint8_t *top,
-   const uint8_t *diff, ptrdiff_t w,
-   int *left, int *left_top);
 void ff_add_median_pred_sse2(uint8_t *dst, const uint8_t *top,
  const uint8_t *diff, ptrdiff_t w,
  int *left, int *left_top);
@@ -47,63 +42,10 @@ int ff_add_left_pred_int16_unaligned_ssse3(uint16_t *dst, 
const uint16_t *src, u
 void ff_add_gradient_pred_ssse3(uint8_t *src, const ptrdiff_t stride, const 
ptrdiff_t width);
 void ff_add_gradient_pred_avx2(uint8_t *src, const ptrdiff_t stride, const 
ptrdiff_t width);
 
-#if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32
-static void add_median_pred_cmov(uint8_t *dst, const uint8_t *top,
- const uint8_t *diff, ptrdiff_t w,
- int *left, int *left_top)
-{
-x86_reg w2 = -w;
-x86_reg x;
-int l  = *left & 0xff;
-int tl = *left_top & 0xff;
-int t;
-__asm__ volatile (
-"mov  %7, %3\n"
-"1: \n"
-"movzbl (%3, %4), %2\n"
-"mov  %2, %k3   \n"
-"sub %b1, %b3   \n"
-"add %b0, %b3   \n"
-"mov  %2, %1\n"
-"cmp  %0, %2\n"
-"cmovg%0, %2\n"
-"cmovg%1, %0\n"
-"cmp %k3, %0\n"
-"cmovg   %k3, %0\n"
-"mov  %7, %3\n"
-"cmp  %2, %0\n"
-"cmovl%2, %0\n"
-"add(%6, %4), %b0   \n"
-"mov %b0, (%5, %4)  \n"
-"inc  %4\n"
-"jl   1b\n"
-: "+&q"(l), "+&q"(tl), "=&r"(t), "=&q"(x), "+&r"(w2)
-: "r"(dst + w), "r"(diff + w), "rm"(top + w)
-);
-*left = l;
-*left_top = tl;
-}
-#endif
-
 void ff_llviddsp_init_x86(LLVidDSPContext *c)
 {
 int cpu_flags = av_get_cpu_flags();