[FFmpeg-cvslog] avcodec/x86/ac3dsp_init: Remove obsolete 3dnow, MMX(EXT), SSE functions
ffmpeg | branch: master | Andreas Rheinhardt | Tue Jun 7 00:55:25 2022 +0200| [fd98594a8831ce037a495b6d7e090bd8f81e83a1] | committer: Andreas Rheinhardt avcodec/x86/ac3dsp_init: Remove obsolete 3dnow, MMX(EXT), SSE functions x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from these functions are truely ancient 32bit x86s they are removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=fd98594a8831ce037a495b6d7e090bd8f81e83a1 --- libavcodec/x86/ac3dsp.asm| 61 libavcodec/x86/ac3dsp_init.c | 18 - 2 files changed, 79 deletions(-) diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm index 4ddaa94320..c11a94ca93 100644 --- a/libavcodec/x86/ac3dsp.asm +++ b/libavcodec/x86/ac3dsp.asm @@ -63,14 +63,7 @@ cglobal ac3_exponent_min, 3, 4, 2, exp, reuse_blks, expn, offset REP_RET %endmacro -%define LOOP_ALIGN -INIT_MMX mmx -AC3_EXPONENT_MIN -%if HAVE_MMXEXT_EXTERNAL %define LOOP_ALIGN ALIGN 16 -INIT_MMX mmxext -AC3_EXPONENT_MIN -%endif %if HAVE_SSE2_EXTERNAL INIT_XMM sse2 AC3_EXPONENT_MIN @@ -81,60 +74,6 @@ AC3_EXPONENT_MIN ; void ff_float_to_fixed24(int32_t *dst, const float *src, unsigned int len) ;- -; The 3DNow! version is not bit-identical because pf2id uses truncation rather -; than round-to-nearest. -INIT_MMX 3dnow -cglobal float_to_fixed24, 3, 3, 0, dst, src, len -movq m0, [pf_1_24] -.loop: -movq m1, [srcq ] -movq m2, [srcq+8 ] -movq m3, [srcq+16] -movq m4, [srcq+24] -pfmul m1, m0 -pfmul m2, m0 -pfmul m3, m0 -pfmul m4, m0 -pf2id m1, m1 -pf2id m2, m2 -pf2id m3, m3 -pf2id m4, m4 -movq [dstq ], m1 -movq [dstq+8 ], m2 -movq [dstq+16], m3 -movq [dstq+24], m4 -add srcq, 32 -add dstq, 32 -sub lend, 8 -ja .loop -femms -RET - -INIT_XMM sse -cglobal float_to_fixed24, 3, 3, 3, dst, src, len -movaps m0, [pf_1_24] -.loop: -movaps m1, [srcq ] -movaps m2, [srcq+16] -mulps m1, m0 -mulps m2, m0 -cvtps2pi mm0, m1 -movhlpsm1, m1 -cvtps2pi mm1, m1 -cvtps2pi mm2, m2 -movhlpsm2, m2 -cvtps2pi mm3, m2 -movq [dstq ], mm0 -movq [dstq+ 8], mm1 -movq [dstq+16], mm2 -movq [dstq+24], mm3 -add srcq, 32 -add dstq, 32 -sub lend, 8 -ja .loop -emms -RET - INIT_XMM sse2 cglobal float_to_fixed24, 3, 3, 9, dst, src, len movaps m0, [pf_1_24] diff --git a/libavcodec/x86/ac3dsp_init.c b/libavcodec/x86/ac3dsp_init.c index 5f20e6dc31..75a341bc95 100644 --- a/libavcodec/x86/ac3dsp_init.c +++ b/libavcodec/x86/ac3dsp_init.c @@ -24,12 +24,8 @@ #include "libavutil/x86/cpu.h" #include "libavcodec/ac3dsp.h" -void ff_ac3_exponent_min_mmx (uint8_t *exp, int num_reuse_blocks, int nb_coefs); -void ff_ac3_exponent_min_mmxext(uint8_t *exp, int num_reuse_blocks, int nb_coefs); void ff_ac3_exponent_min_sse2 (uint8_t *exp, int num_reuse_blocks, int nb_coefs); -void ff_float_to_fixed24_3dnow(int32_t *dst, const float *src, unsigned int len); -void ff_float_to_fixed24_sse (int32_t *dst, const float *src, unsigned int len); void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, unsigned int len); int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]); @@ -41,20 +37,6 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) { int cpu_flags = av_get_cpu_flags(); -if (EXTERNAL_MMX(cpu_flags)) { -c->ac3_exponent_min = ff_ac3_exponent_min_mmx; -} -if (EXTERNAL_AMD3DNOW(cpu_flags)) { -if (!bit_exact) { -c->float_to_fixed24 = ff_float_to_fixed24_3dnow; -} -} -if (EXTERNAL_MMXEXT(cpu_flags)) { -c->ac3_exponent_min = ff_ac3_exponent_min_mmxext; -} -if (EXTERNAL_SSE(cpu_flags)) { -c->float_to_fixed24 = ff_float_to_fixed24_sse; -} if (EXTERNAL_SSE2(cpu_flags)) { c->ac3_exponent_min = ff_ac3_exponent_min_sse2; c->float_to_fixed24 = ff_float_to_fixed24_sse2; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avcodec/x86/vc1dsp_init: Remove obsolete 3dnow, MMX(EXT) functions
ffmpeg | branch: master | Andreas Rheinhardt | Tue Jun 7 00:40:04 2022 +0200| [e02ffed004caf192c6307813e7b178c3044993c6] | committer: Andreas Rheinhardt avcodec/x86/vc1dsp_init: Remove obsolete 3dnow, MMX(EXT) functions x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from these functions are truely ancient 32bit x86s they are removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e02ffed004caf192c6307813e7b178c3044993c6 --- libavcodec/x86/h264_chromamc.asm | 1 - libavcodec/x86/vc1dsp_init.c | 40 +--- libavcodec/x86/vc1dsp_loopfilter.asm | 36 3 files changed, 18 insertions(+), 59 deletions(-) diff --git a/libavcodec/x86/h264_chromamc.asm b/libavcodec/x86/h264_chromamc.asm index b5a78b537d..c7c4a2d4aa 100644 --- a/libavcodec/x86/h264_chromamc.asm +++ b/libavcodec/x86/h264_chromamc.asm @@ -448,7 +448,6 @@ chroma_mc2_mmx_func avg, h264 INIT_MMX 3dnow chroma_mc8_mmx_func avg, h264, _rnd -chroma_mc8_mmx_func avg, vc1, _nornd chroma_mc8_mmx_func avg, rv40 chroma_mc4_mmx_func avg, h264 chroma_mc4_mmx_func avg, rv40 diff --git a/libavcodec/x86/vc1dsp_init.c b/libavcodec/x86/vc1dsp_init.c index 2fbf0b3a74..b1f68b098c 100644 --- a/libavcodec/x86/vc1dsp_init.c +++ b/libavcodec/x86/vc1dsp_init.c @@ -33,9 +33,10 @@ #include "vc1dsp.h" #include "config.h" -#define LOOP_FILTER(EXT) \ +#define LOOP_FILTER4(EXT) \ void ff_vc1_v_loop_filter4_ ## EXT(uint8_t *src, ptrdiff_t stride, int pq); \ -void ff_vc1_h_loop_filter4_ ## EXT(uint8_t *src, ptrdiff_t stride, int pq); \ +void ff_vc1_h_loop_filter4_ ## EXT(uint8_t *src, ptrdiff_t stride, int pq); +#define LOOP_FILTER816(EXT) \ void ff_vc1_v_loop_filter8_ ## EXT(uint8_t *src, ptrdiff_t stride, int pq); \ void ff_vc1_h_loop_filter8_ ## EXT(uint8_t *src, ptrdiff_t stride, int pq); \ \ @@ -52,9 +53,10 @@ static void vc1_h_loop_filter16_ ## EXT(uint8_t *src, ptrdiff_t stride, int pq) } #if HAVE_X86ASM -LOOP_FILTER(mmxext) -LOOP_FILTER(sse2) -LOOP_FILTER(ssse3) +LOOP_FILTER4(mmxext) +LOOP_FILTER816(sse2) +LOOP_FILTER4(ssse3) +LOOP_FILTER816(ssse3) void ff_vc1_h_loop_filter8_sse4(uint8_t *src, ptrdiff_t stride, int pq); @@ -72,11 +74,7 @@ static void vc1_h_loop_filter16_sse4(uint8_t *src, ptrdiff_t stride, int pq) } DECLARE_FUNCTION(put_, 8, _mmx) -DECLARE_FUNCTION(put_, 16, _mmx) -DECLARE_FUNCTION(avg_, 8, _mmx) -DECLARE_FUNCTION(avg_, 16, _mmx) DECLARE_FUNCTION(avg_, 8, _mmxext) -DECLARE_FUNCTION(avg_, 16, _mmxext) DECLARE_FUNCTION(put_, 16, _sse2) DECLARE_FUNCTION(avg_, 16, _sse2) @@ -86,8 +84,6 @@ void ff_put_vc1_chroma_mc8_nornd_mmx (uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y); void ff_avg_vc1_chroma_mc8_nornd_mmxext(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y); -void ff_avg_vc1_chroma_mc8_nornd_3dnow(uint8_t *dst, uint8_t *src, - ptrdiff_t stride, int h, int x, int y); void ff_put_vc1_chroma_mc8_nornd_ssse3(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y); void ff_avg_vc1_chroma_mc8_nornd_ssse3(uint8_t *dst, uint8_t *src, @@ -114,9 +110,10 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp) if (EXTERNAL_MMXEXT(cpu_flags)) ff_vc1dsp_init_mmxext(dsp); -#define ASSIGN_LF(EXT) \ +#define ASSIGN_LF4(EXT) \ dsp->vc1_v_loop_filter4 = ff_vc1_v_loop_filter4_ ## EXT; \ -dsp->vc1_h_loop_filter4 = ff_vc1_h_loop_filter4_ ## EXT; \ +dsp->vc1_h_loop_filter4 = ff_vc1_h_loop_filter4_ ## EXT +#define ASSIGN_LF816(EXT) \ dsp->vc1_v_loop_filter8 = ff_vc1_v_loop_filter8_ ## EXT; \ dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_ ## EXT; \ dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_ ## EXT; \ @@ -127,19 +124,12 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp) dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_mmx; dsp->put_vc1_mspel_pixels_tab[1][0] = put_vc1_mspel_mc00_8_mmx; -dsp->put_vc1_mspel_pixels_tab[0][0] = put_vc1_mspel_mc00_16_mmx; -dsp->avg_vc1_mspel_pixels_tab[1][0] = avg_vc1_mspel_mc00_8_mmx; -dsp->avg_vc1_mspel_pixels_tab[0][0] = avg_vc1_mspel_mc00_16_mmx; -} -if (EXTERNAL_AMD3DNOW(cpu_flags)) { -dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_3dnow; } if (EXTERNAL_MMXEXT(cpu_flags)) { -ASSIGN_LF(mmxext); +ASSIGN_LF4(mmxext); dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_mmxext; dsp->avg_vc1_mspel_pixels_tab[1][0]
[FFmpeg-cvslog] avcodec/x86/diracdsp: Remove obsolete MMX(EXT) functions
ffmpeg | branch: master | Andreas Rheinhardt | Tue Jun 7 01:37:59 2022 +0200| [d29a9c2aa68fc3eb6d61ff95c698e29316037583] | committer: Andreas Rheinhardt avcodec/x86/diracdsp: Remove obsolete MMX(EXT) functions x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from these functions are truely ancient 32bit x86s they are removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d29a9c2aa68fc3eb6d61ff95c698e29316037583 --- libavcodec/x86/diracdsp.asm| 8 libavcodec/x86/diracdsp_init.c | 25 - 2 files changed, 33 deletions(-) diff --git a/libavcodec/x86/diracdsp.asm b/libavcodec/x86/diracdsp.asm index 17145baf87..e5e2b11846 100644 --- a/libavcodec/x86/diracdsp.asm +++ b/libavcodec/x86/diracdsp.asm @@ -248,14 +248,6 @@ cglobal add_dirac_obmc%1_%2, 6,6,5, dst, src, stride, obmc, yblen %endm INIT_MMX -%if ARCH_X86_64 == 0 -PUT_RECT mmx -ADD_RECT mmx - -HPEL_FILTER mmx -ADD_OBMC 32, mmx -ADD_OBMC 16, mmx -%endif ADD_OBMC 8, mmx INIT_XMM diff --git a/libavcodec/x86/diracdsp_init.c b/libavcodec/x86/diracdsp_init.c index 8baacf3129..f678759dc0 100644 --- a/libavcodec/x86/diracdsp_init.c +++ b/libavcodec/x86/diracdsp_init.c @@ -22,19 +22,14 @@ #include "libavcodec/diracdsp.h" #include "fpel.h" -void ff_add_rect_clamped_mmx(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int); void ff_add_rect_clamped_sse2(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int); void ff_add_dirac_obmc8_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); -void ff_add_dirac_obmc16_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); -void ff_add_dirac_obmc32_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); void ff_add_dirac_obmc16_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); void ff_add_dirac_obmc32_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); -void ff_put_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); -void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); void ff_put_signed_rect_clamped_10_sse4(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height); @@ -87,16 +82,9 @@ static void OPNAME ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[5 }\ } -DIRAC_PIXOP(put, mmx) -DIRAC_PIXOP(avg, mmx) -DIRAC_PIXOP(avg, mmxext) - DIRAC_PIXOP(put, sse2) DIRAC_PIXOP(avg, sse2) -#if !ARCH_X86_64 -HPEL_FILTER(8, mmx) -#endif HPEL_FILTER(16, sse2) #endif // HAVE_X86ASM @@ -108,19 +96,6 @@ void ff_diracdsp_init_x86(DiracDSPContext* c) if (EXTERNAL_MMX(mm_flags)) { c->add_dirac_obmc[0] = ff_add_dirac_obmc8_mmx; -#if !ARCH_X86_64 -c->add_dirac_obmc[1] = ff_add_dirac_obmc16_mmx; -c->add_dirac_obmc[2] = ff_add_dirac_obmc32_mmx; -c->dirac_hpel_filter = dirac_hpel_filter_mmx; -c->add_rect_clamped = ff_add_rect_clamped_mmx; -c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_mmx; -#endif -PIXFUNC(put, 0, mmx); -PIXFUNC(avg, 0, mmx); -} - -if (EXTERNAL_MMXEXT(mm_flags)) { -PIXFUNC(avg, 0, mmxext); } if (EXTERNAL_SSE2(mm_flags)) { ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avcodec/x86/audiodsp_init: Remove obsolete MMX(EXT) functions
ffmpeg | branch: master | Andreas Rheinhardt | Tue Jun 7 01:22:52 2022 +0200| [3d716d38abdae1982e84e30becb57458244656bd] | committer: Andreas Rheinhardt avcodec/x86/audiodsp_init: Remove obsolete MMX(EXT) functions x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from these functions are truely ancient 32bit x86s they are removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3d716d38abdae1982e84e30becb57458244656bd --- libavcodec/x86/audiodsp.asm| 13 + libavcodec/x86/audiodsp_init.c | 10 -- 2 files changed, 1 insertion(+), 22 deletions(-) diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm index de395e5fa8..b604b0443c 100644 --- a/libavcodec/x86/audiodsp.asm +++ b/libavcodec/x86/audiodsp.asm @@ -23,8 +23,8 @@ SECTION .text -%macro SCALARPRODUCT 0 ; int ff_scalarproduct_int16(int16_t *v1, int16_t *v2, int order) +INIT_XMM sse2 cglobal scalarproduct_int16, 3,3,3, v1, v2, order add orderd, orderd add v1q, orderq @@ -42,16 +42,7 @@ cglobal scalarproduct_int16, 3,3,3, v1, v2, order jl .loop HADDD m2, m0 movd eax, m2 -%if mmsize == 8 -emms -%endif RET -%endmacro - -INIT_MMX mmxext -SCALARPRODUCT -INIT_XMM sse2 -SCALARPRODUCT ;- @@ -117,8 +108,6 @@ cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len REP_RET %endmacro -INIT_MMX mmx -VECTOR_CLIP_INT32 0, 1, 0, 0 INIT_XMM sse2 VECTOR_CLIP_INT32 6, 1, 0, 0, _int VECTOR_CLIP_INT32 6, 2, 0, 1 diff --git a/libavcodec/x86/audiodsp_init.c b/libavcodec/x86/audiodsp_init.c index 98e296c264..aa5e43e570 100644 --- a/libavcodec/x86/audiodsp_init.c +++ b/libavcodec/x86/audiodsp_init.c @@ -24,13 +24,9 @@ #include "libavutil/x86/cpu.h" #include "libavcodec/audiodsp.h" -int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2, - int order); int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, int order); -void ff_vector_clip_int32_mmx(int32_t *dst, const int32_t *src, - int32_t min, int32_t max, unsigned int len); void ff_vector_clip_int32_sse2(int32_t *dst, const int32_t *src, int32_t min, int32_t max, unsigned int len); void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src, @@ -44,12 +40,6 @@ av_cold void ff_audiodsp_init_x86(AudioDSPContext *c) { int cpu_flags = av_get_cpu_flags(); -if (EXTERNAL_MMX(cpu_flags)) -c->vector_clip_int32 = ff_vector_clip_int32_mmx; - -if (EXTERNAL_MMXEXT(cpu_flags)) -c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext; - if (EXTERNAL_SSE(cpu_flags)) c->vector_clipf = ff_vector_clipf_sse; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avcodec/x86/mpegvideoenc: Remove obsolete MMX(EXT) functions
ffmpeg | branch: master | Andreas Rheinhardt | Tue Jun 7 02:06:41 2022 +0200| [7284ab789d5fe271b9d6a1666ab5ea6be8724cca] | committer: Andreas Rheinhardt avcodec/x86/mpegvideoenc: Remove obsolete MMX(EXT) functions x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from these functions are truely ancient 32bit x86s they are removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=7284ab789d5fe271b9d6a1666ab5ea6be8724cca --- libavcodec/x86/mpegvideoenc.c | 95 +- libavcodec/x86/mpegvideoenc_template.c | 10 2 files changed, 1 insertion(+), 104 deletions(-) diff --git a/libavcodec/x86/mpegvideoenc.c b/libavcodec/x86/mpegvideoenc.c index 3691cce26c..612e7ff758 100644 --- a/libavcodec/x86/mpegvideoenc.c +++ b/libavcodec/x86/mpegvideoenc.c @@ -42,34 +42,9 @@ DECLARE_ALIGNED(16, static const uint16_t, inv_zigzag_direct16)[64] = { #if HAVE_6REGS -#if HAVE_MMX_INLINE -#define COMPILE_TEMPLATE_MMXEXT 0 -#define COMPILE_TEMPLATE_SSE2 0 -#define COMPILE_TEMPLATE_SSSE3 0 -#define RENAME(a) a ## _mmx -#define RENAME_FDCT(a) a ## _mmx -#include "mpegvideoenc_template.c" -#endif /* HAVE_MMX_INLINE */ - -#if HAVE_MMXEXT_INLINE -#undef COMPILE_TEMPLATE_SSSE3 -#undef COMPILE_TEMPLATE_SSE2 -#undef COMPILE_TEMPLATE_MMXEXT -#define COMPILE_TEMPLATE_MMXEXT 1 -#define COMPILE_TEMPLATE_SSE2 0 -#define COMPILE_TEMPLATE_SSSE3 0 -#undef RENAME -#undef RENAME_FDCT -#define RENAME(a) a ## _mmxext -#define RENAME_FDCT(a) a ## _mmxext -#include "mpegvideoenc_template.c" -#endif /* HAVE_MMXEXT_INLINE */ - #if HAVE_SSE2_INLINE -#undef COMPILE_TEMPLATE_MMXEXT #undef COMPILE_TEMPLATE_SSE2 #undef COMPILE_TEMPLATE_SSSE3 -#define COMPILE_TEMPLATE_MMXEXT 0 #define COMPILE_TEMPLATE_SSE2 1 #define COMPILE_TEMPLATE_SSSE3 0 #undef RENAME @@ -80,10 +55,8 @@ DECLARE_ALIGNED(16, static const uint16_t, inv_zigzag_direct16)[64] = { #endif /* HAVE_SSE2_INLINE */ #if HAVE_SSSE3_INLINE -#undef COMPILE_TEMPLATE_MMXEXT #undef COMPILE_TEMPLATE_SSE2 #undef COMPILE_TEMPLATE_SSSE3 -#define COMPILE_TEMPLATE_MMXEXT 0 #define COMPILE_TEMPLATE_SSE2 1 #define COMPILE_TEMPLATE_SSSE3 1 #undef RENAME @@ -96,62 +69,6 @@ DECLARE_ALIGNED(16, static const uint16_t, inv_zigzag_direct16)[64] = { #endif /* HAVE_6REGS */ #if HAVE_INLINE_ASM -#if HAVE_MMX_INLINE -static void denoise_dct_mmx(MpegEncContext *s, int16_t *block){ -const int intra= s->mb_intra; -int *sum= s->dct_error_sum[intra]; -uint16_t *offset= s->dct_offset[intra]; - -s->dct_count[intra]++; - -__asm__ volatile( -"pxor %%mm7, %%mm7 \n\t" -"1: \n\t" -"pxor %%mm0, %%mm0 \n\t" -"pxor %%mm1, %%mm1 \n\t" -"movq (%0), %%mm2 \n\t" -"movq 8(%0), %%mm3 \n\t" -"pcmpgtw %%mm2, %%mm0 \n\t" -"pcmpgtw %%mm3, %%mm1 \n\t" -"pxor %%mm0, %%mm2 \n\t" -"pxor %%mm1, %%mm3 \n\t" -"psubw %%mm0, %%mm2 \n\t" -"psubw %%mm1, %%mm3 \n\t" -"movq %%mm2, %%mm4 \n\t" -"movq %%mm3, %%mm5 \n\t" -"psubusw (%2), %%mm2\n\t" -"psubusw 8(%2), %%mm3 \n\t" -"pxor %%mm0, %%mm2 \n\t" -"pxor %%mm1, %%mm3 \n\t" -"psubw %%mm0, %%mm2 \n\t" -"psubw %%mm1, %%mm3 \n\t" -"movq %%mm2, (%0) \n\t" -"movq %%mm3, 8(%0) \n\t" -"movq %%mm4, %%mm2 \n\t" -"movq %%mm5, %%mm3 \n\t" -"punpcklwd %%mm7, %%mm4 \n\t" -"punpckhwd %%mm7, %%mm2 \n\t" -"punpcklwd %%mm7, %%mm5 \n\t" -"punpckhwd %%mm7, %%mm3 \n\t" -"paddd (%1), %%mm4 \n\t" -"paddd 8(%1), %%mm2 \n\t" -"paddd 16(%1), %%mm5\n\t" -"paddd 24(%1), %%mm3\n\t" -"movq %%mm4, (%1) \n\t" -"movq %%mm2, 8(%1) \n\t" -"movq %%mm5, 16(%1) \n\t" -"movq %%mm3, 24(%1) \n\t" -"add $16, %0\n\t" -"add $32, %1\n\t" -"add $16, %2\n\t" -"cmp %3, %0
[FFmpeg-cvslog] avcodec/x86/cavsdsp: Remove obsolete MMX(EXT), 3dnow functions
ffmpeg | branch: master | Andreas Rheinhardt | Tue Jun 7 13:55:24 2022 +0200| [3221aba87989742ea22b639a7bb4af69f4eaa0e7] | committer: Andreas Rheinhardt avcodec/x86/cavsdsp: Remove obsolete MMX(EXT), 3dnow functions x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from these functions are truely ancient 32bit x86s they are removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3221aba87989742ea22b639a7bb4af69f4eaa0e7 --- libavcodec/x86/cavsdsp.c| 71 +++-- libavcodec/x86/cavsidct.asm | 62 --- 2 files changed, 4 insertions(+), 129 deletions(-) diff --git a/libavcodec/x86/cavsdsp.c b/libavcodec/x86/cavsdsp.c index f974f93fc0..7ceb51a23c 100644 --- a/libavcodec/x86/cavsdsp.c +++ b/libavcodec/x86/cavsdsp.c @@ -36,16 +36,7 @@ #include "config.h" -#if HAVE_MMX_EXTERNAL - -void ff_cavs_idct8_mmx(int16_t *out, const int16_t *in); - -static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, ptrdiff_t stride) -{ -LOCAL_ALIGNED(16, int16_t, b2, [64]); -ff_cavs_idct8_mmx(b2, block); -ff_add_pixels_clamped_mmx(b2, dst, stride); -} +#if HAVE_SSE2_EXTERNAL void ff_cavs_idct8_sse2(int16_t *out, const int16_t *in); @@ -56,9 +47,9 @@ static void cavs_idct8_add_sse2(uint8_t *dst, int16_t *block, ptrdiff_t stride) ff_add_pixels_clamped_sse2(b2, dst, stride); } -#endif /* HAVE_MMX_EXTERNAL */ +#endif /* HAVE_SSE2_EXTERNAL */ -#if (HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE) +#if HAVE_MMXEXT_INLINE /* * @@ -326,7 +317,7 @@ static void OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, const uin "pavgb " #temp ", " #a " \n\t"\ "mov" #size " " #a ", " #b " \n\t" -#endif /* (HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE) */ +#endif /* HAVE_MMXEXT_INLINE */ #if HAVE_MMX_EXTERNAL static void put_cavs_qpel8_mc00_mmx(uint8_t *dst, const uint8_t *src, @@ -335,36 +326,12 @@ static void put_cavs_qpel8_mc00_mmx(uint8_t *dst, const uint8_t *src, ff_put_pixels8_mmx(dst, src, stride, 8); } -static void avg_cavs_qpel8_mc00_mmx(uint8_t *dst, const uint8_t *src, -ptrdiff_t stride) -{ -ff_avg_pixels8_mmx(dst, src, stride, 8); -} - static void avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, const uint8_t *src, ptrdiff_t stride) { ff_avg_pixels8_mmxext(dst, src, stride, 8); } -static void put_cavs_qpel16_mc00_mmx(uint8_t *dst, const uint8_t *src, - ptrdiff_t stride) -{ -ff_put_pixels16_mmx(dst, src, stride, 16); -} - -static void avg_cavs_qpel16_mc00_mmx(uint8_t *dst, const uint8_t *src, - ptrdiff_t stride) -{ -ff_avg_pixels16_mmx(dst, src, stride, 16); -} - -static void avg_cavs_qpel16_mc00_mmxext(uint8_t *dst, const uint8_t *src, -ptrdiff_t stride) -{ -ff_avg_pixels16_mmxext(dst, src, stride, 16); -} - static void put_cavs_qpel16_mc00_sse2(uint8_t *dst, const uint8_t *src, ptrdiff_t stride) { @@ -382,13 +349,7 @@ static av_cold void cavsdsp_init_mmx(CAVSDSPContext *c, AVCodecContext *avctx) { #if HAVE_MMX_EXTERNAL -c->put_cavs_qpel_pixels_tab[0][0] = put_cavs_qpel16_mc00_mmx; c->put_cavs_qpel_pixels_tab[1][0] = put_cavs_qpel8_mc00_mmx; -c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_mmx; -c->avg_cavs_qpel_pixels_tab[1][0] = avg_cavs_qpel8_mc00_mmx; - -c->cavs_idct8_add = cavs_idct8_add_mmx; -c->idct_perm = FF_IDCT_PERM_TRANSPOSE; #endif /* HAVE_MMX_EXTERNAL */ } @@ -408,25 +369,6 @@ CAVS_MC(avg_, 8, mmxext) CAVS_MC(avg_, 16, mmxext) #endif /* HAVE_MMXEXT_INLINE */ -#if HAVE_AMD3DNOW_INLINE -QPEL_CAVS(put_, PUT_OP, 3dnow) -QPEL_CAVS(avg_, AVG_3DNOW_OP, 3dnow) - -CAVS_MC(put_, 8, 3dnow) -CAVS_MC(put_, 16,3dnow) -CAVS_MC(avg_, 8, 3dnow) -CAVS_MC(avg_, 16,3dnow) - -static av_cold void cavsdsp_init_3dnow(CAVSDSPContext *c, - AVCodecContext *avctx) -{ -DSPFUNC(put, 0, 16, 3dnow); -DSPFUNC(put, 1, 8, 3dnow); -DSPFUNC(avg, 0, 16, 3dnow); -DSPFUNC(avg, 1, 8, 3dnow); -} -#endif /* HAVE_AMD3DNOW_INLINE */ - av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c, AVCodecContext *avctx) { av_unused int cpu_flags = av_get_cpu_flags(); @@ -434,10 +376,6 @@ av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c, AVCodecContext *avctx) if (X86_MMX(cpu_flags)) cavsdsp_init_mmx(c, avctx); -#if HAVE_AMD3DNOW_INLINE -if (INLINE_AMD3DNOW(cpu_flags)) -cavsdsp_init_3dnow(c,
[FFmpeg-cvslog] avcodec/x86/fdct: Remove obsolete MMX(EXT) functions
ffmpeg | branch: master | Andreas Rheinhardt | Fri Jun 10 15:32:33 2022 +0200| [d402ec6be99dc82e263bad883e7c1c3d957343db] | committer: Andreas Rheinhardt avcodec/x86/fdct: Remove obsolete MMX(EXT) functions x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from these functions are truely ancient 32bit x86s they are removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d402ec6be99dc82e263bad883e7c1c3d957343db --- libavcodec/tests/x86/dct.c| 6 -- libavcodec/x86/fdct.c | 221 +- libavcodec/x86/fdct.h | 2 - libavcodec/x86/fdctdsp_init.c | 6 -- 4 files changed, 1 insertion(+), 234 deletions(-) diff --git a/libavcodec/tests/x86/dct.c b/libavcodec/tests/x86/dct.c index b332c9642d..6e3d8f7c01 100644 --- a/libavcodec/tests/x86/dct.c +++ b/libavcodec/tests/x86/dct.c @@ -58,12 +58,6 @@ PR_WRAP(avx) #endif static const struct algo fdct_tab_arch[] = { -#if HAVE_MMX_INLINE -{ "MMX",ff_fdct_mmx,FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMX }, -#endif -#if HAVE_MMXEXT_INLINE -{ "MMXEXT", ff_fdct_mmxext, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMXEXT }, -#endif #if HAVE_SSE2_INLINE { "SSE2", ff_fdct_sse2, FF_IDCT_PERM_NONE, AV_CPU_FLAG_SSE2 }, #endif diff --git a/libavcodec/x86/fdct.c b/libavcodec/x86/fdct.c index 835fcc2b28..f4677ff4be 100644 --- a/libavcodec/x86/fdct.c +++ b/libavcodec/x86/fdct.c @@ -37,7 +37,7 @@ #include "libavutil/x86/asm.h" #include "fdct.h" -#if HAVE_MMX_INLINE +#if HAVE_SSE2_INLINE // // @@ -71,8 +71,6 @@ DECLARE_ALIGNED(16, static const int16_t, ocos_4_16)[8] = { DECLARE_ALIGNED(16, static const int16_t, fdct_one_corr)[8] = { X8(1) }; -DECLARE_ALIGNED(8, static const int32_t, fdct_r_row)[2] = {RND_FRW_ROW, RND_FRW_ROW }; - static const struct { DECLARE_ALIGNED(16, const int32_t, fdct_r_row_sse2)[4]; @@ -82,80 +80,6 @@ static const struct }}; //DECLARE_ALIGNED(16, static const long, fdct_r_row_sse2)[4] = {RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW}; -DECLARE_ALIGNED(8, static const int16_t, tab_frw_01234567)[] = { // forward_dct coeff table - 16384, 16384, 22725, 19266, - 16384, 16384, 12873,4520, - 21407,8867, 19266, -4520, - -8867, -21407, -22725, -12873, - 16384, -16384, 12873, -22725, - -16384, 16384,4520, 19266, - 8867, -21407,4520, -12873, - 21407, -8867, 19266, -22725, - - 22725, 22725, 31521, 26722, - 22725, 22725, 17855,6270, - 29692, 12299, 26722, -6270, - -12299, -29692, -31521, -17855, - 22725, -22725, 17855, -31521, - -22725, 22725,6270, 26722, - 12299, -29692,6270, -17855, - 29692, -12299, 26722, -31521, - - 21407, 21407, 29692, 25172, - 21407, 21407, 16819,5906, - 27969, 11585, 25172, -5906, - -11585, -27969, -29692, -16819, - 21407, -21407, 16819, -29692, - -21407, 21407,5906, 25172, - 11585, -27969,5906, -16819, - 27969, -11585, 25172, -29692, - - 19266, 19266, 26722, 22654, - 19266, 19266, 15137,5315, - 25172, 10426, 22654, -5315, - -10426, -25172, -26722, -15137, - 19266, -19266, 15137, -26722, - -19266, 19266,5315, 22654, - 10426, -25172,5315, -15137, - 25172, -10426, 22654, -26722, - - 16384, 16384, 22725, 19266, - 16384, 16384, 12873,4520, - 21407,8867, 19266, -4520, - -8867, -21407, -22725, -12873, - 16384, -16384, 12873, -22725, - -16384, 16384,4520, 19266, - 8867, -21407,4520, -12873, - 21407, -8867, 19266, -22725, - - 19266, 19266, 26722, 22654, - 19266, 19266, 15137,5315, - 25172, 10426, 22654, -5315, - -10426, -25172, -26722, -15137, - 19266, -19266, 15137, -26722, - -19266, 19266,5315, 22654, - 10426, -25172,5315, -15137, - 25172, -10426, 22654, -26722, - - 21407, 21407, 29692, 25172, - 21407, 21407, 16819,5906, - 27969, 11585, 25172, -5906, - -11585, -27969, -29692, -16819, - 21407, -21407, 16819, -29692, - -21407, 21407,5906, 25172, - 11585, -27969,5906, -16819, - 27969, -11585, 25172, -29692, - - 22725, 22725, 31521, 26722, - 22725, 22725, 17855,6270, - 29692, 12299, 26722, -6270, - -12299, -29692, -31521, -17855, - 22725, -22725, 17855, -31521, - -22725, 22725,6270, 26722, - 12299, -29692,6270, -17855, - 29692, -12299, 26722, -31521, -}; - static const struct { DECLARE_ALIGNED(16, const int16_t, tab_frw_01234567_sse2)[256]; @@ -375,7 +299,6 @@ static av_always_inline void fdct_col_##cpu(const int16_t *in, int16_t *out, int
[FFmpeg-cvslog] avcodec/x86/h264_intrapred: Remove obsolete MMX(EXT) functions
ffmpeg | branch: master | Andreas Rheinhardt | Tue Jun 7 15:48:09 2022 +0200| [9bc527126c0e3d8d57e40b33b08e91eb3adf4575] | committer: Andreas Rheinhardt avcodec/x86/h264_intrapred: Remove obsolete MMX(EXT) functions x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from these functions are truely ancient 32bit x86s they are removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9bc527126c0e3d8d57e40b33b08e91eb3adf4575 --- libavcodec/x86/h264_intrapred.asm | 715 +--- libavcodec/x86/h264_intrapred_10bit.asm | 94 + libavcodec/x86/h264_intrapred_init.c| 77 3 files changed, 10 insertions(+), 876 deletions(-) diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm index b36c198fbb..31840a1472 100644 --- a/libavcodec/x86/h264_intrapred.asm +++ b/libavcodec/x86/h264_intrapred.asm @@ -48,22 +48,6 @@ cextern pw_8 ; void ff_pred16x16_vertical_8(uint8_t *src, ptrdiff_t stride) ;- -INIT_MMX mmx -cglobal pred16x16_vertical_8, 2,3 -sub r0, r1 -mov r2, 8 -movq mm0, [r0+0] -movq mm1, [r0+8] -.loop: -movq [r0+r1*1+0], mm0 -movq [r0+r1*1+8], mm1 -movq [r0+r1*2+0], mm0 -movq [r0+r1*2+8], mm1 -lea r0, [r0+r1*2] -dec r2 -jg .loop -REP_RET - INIT_XMM sse cglobal pred16x16_vertical_8, 2,3 sub r0, r1 @@ -114,8 +98,6 @@ cglobal pred16x16_horizontal_8, 2,3 REP_RET %endmacro -INIT_MMX mmx -PRED16x16_H INIT_MMX mmxext PRED16x16_H INIT_XMM ssse3 @@ -154,14 +136,6 @@ cglobal pred16x16_dc_8, 2,7 %endif SPLATB_REG m0, r2, m1 -%if mmsize==8 -mov r3d, 8 -.loop: -mova [r4+r1*0+0], m0 -mova [r4+r1*0+8], m0 -mova [r4+r1*1+0], m0 -mova [r4+r1*1+8], m0 -%else mov r3d, 4 .loop: mova [r4+r1*0], m0 @@ -169,15 +143,12 @@ cglobal pred16x16_dc_8, 2,7 lea r4, [r4+r1*2] mova [r4+r1*0], m0 mova [r4+r1*1], m0 -%endif lea r4, [r4+r1*2] dec r3d jg .loop REP_RET %endmacro -INIT_MMX mmxext -PRED16x16_DC INIT_XMM sse2 PRED16x16_DC INIT_XMM ssse3 @@ -187,47 +158,6 @@ PRED16x16_DC ; void ff_pred16x16_tm_vp8_8(uint8_t *src, ptrdiff_t stride) ;- -%macro PRED16x16_TM 0 -cglobal pred16x16_tm_vp8_8, 2,5 -subr0, r1 -pxor mm7, mm7 -movq mm0, [r0+0] -movq mm2, [r0+8] -movq mm1, mm0 -movq mm3, mm2 -punpcklbw mm0, mm7 -punpckhbw mm1, mm7 -punpcklbw mm2, mm7 -punpckhbw mm3, mm7 -movzx r3d, byte [r0-1] -mov r4d, 16 -.loop: -movzx r2d, byte [r0+r1-1] -sub r2d, r3d -movd mm4, r2d -SPLATWmm4, mm4, 0 -movq mm5, mm4 -movq mm6, mm4 -movq mm7, mm4 -paddw mm4, mm0 -paddw mm5, mm1 -paddw mm6, mm2 -paddw mm7, mm3 -packuswb mm4, mm5 -packuswb mm6, mm7 -movq [r0+r1+0], mm4 -movq [r0+r1+8], mm6 -addr0, r1 -dec r4d -jg .loop -REP_RET -%endmacro - -INIT_MMX mmx -PRED16x16_TM -INIT_MMX mmxext -PRED16x16_TM - INIT_XMM sse2 cglobal pred16x16_tm_vp8_8, 2,6,6 sub r0, r1 @@ -311,22 +241,6 @@ cglobal pred16x16_plane_%1_8, 2,9,7 neg r1 ; -stride movh m0, [r0+r1 -1] -%if mmsize == 8 -pxor m4, m4 -movh m1, [r0+r1 +3 ] -movh m2, [r0+r1 +8 ] -movh m3, [r0+r1 +12] -punpcklbwm0, m4 -punpcklbwm1, m4 -punpcklbwm2, m4 -punpcklbwm3, m4 -pmullw m0, [pw_m8tom1 ] -pmullw m1, [pw_m8tom1+8] -pmullw m2, [pw_1to8] -pmullw m3, [pw_1to8 +8] -paddwm0, m2 -paddwm1, m3 -%else ; mmsize == 16 %if cpuflag(ssse3) movhps m0, [r0+r1 +8] pmaddubswm0, [plane_shuf] ; H coefficients @@ -340,21 +254,10 @@ cglobal pred16x16_plane_%1_8, 2,9,7 paddwm0, m1 %endif movhlps m1, m0 -%endif paddwm0, m1 -%if cpuflag(mmxext) PSHUFLW m1, m0, 0xE -%elif cpuflag(mmx) -mova m1, m0 -psrlqm1, 32 -%endif paddwm0, m1 -%if cpuflag(mmxext) PSHUFLW m1, m0, 0x1 -%elif cpuflag(mmx) -mova m1, m0 -psrlqm1, 16 -%endif paddwm0, m1 ; sum of H coefficients lea r4, [r0+r2*8-1] @@ -496,24 +399,10 @@ cglobal pred16x16_plane_%1_8, 2,9,7 SWAP 0, 1 %endif mova m2, m0 -%if mmsize == 8 -mova m5, m0 -%endif pmullw m0, [pw_0to7]; 0*H, 1*H, ..., 7*H (
[FFmpeg-cvslog] avcodec/x86/rv40dsp_init: Remove obsolete MMX(EXT), 3dnow functions
ffmpeg | branch: master | Andreas Rheinhardt | Fri Jun 10 16:28:37 2022 +0200| [9abf906800155a3828e869f1820605872c2de6b4] | committer: Andreas Rheinhardt avcodec/x86/rv40dsp_init: Remove obsolete MMX(EXT), 3dnow functions x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from these functions are truely ancient 32bit x86s they are removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9abf906800155a3828e869f1820605872c2de6b4 --- libavcodec/x86/h264_chromamc.asm | 2 -- libavcodec/x86/rv40dsp.asm | 20 - libavcodec/x86/rv40dsp_init.c| 47 3 files changed, 69 deletions(-) diff --git a/libavcodec/x86/h264_chromamc.asm b/libavcodec/x86/h264_chromamc.asm index c7c4a2d4aa..ba6f4af3b0 100644 --- a/libavcodec/x86/h264_chromamc.asm +++ b/libavcodec/x86/h264_chromamc.asm @@ -448,9 +448,7 @@ chroma_mc2_mmx_func avg, h264 INIT_MMX 3dnow chroma_mc8_mmx_func avg, h264, _rnd -chroma_mc8_mmx_func avg, rv40 chroma_mc4_mmx_func avg, h264 -chroma_mc4_mmx_func avg, rv40 %macro chroma_mc8_ssse3_func 2-3 cglobal %1_%2_chroma_mc8%3, 6, 7, 8 diff --git a/libavcodec/x86/rv40dsp.asm b/libavcodec/x86/rv40dsp.asm index bcad1aee80..f2ce236d44 100644 --- a/libavcodec/x86/rv40dsp.asm +++ b/libavcodec/x86/rv40dsp.asm @@ -230,20 +230,6 @@ cglobal %1_rv40_qpel_h, 6, 6+npicregs, 12, dst, dststride, src, srcstride, heigh REP_RET %endmacro -%if ARCH_X86_32 -INIT_MMX mmx -FILTER_V put -FILTER_H put - -INIT_MMX mmxext -FILTER_V avg -FILTER_H avg - -INIT_MMX 3dnow -FILTER_V avg -FILTER_H avg -%endif - INIT_XMM sse2 FILTER_H put FILTER_H avg @@ -481,12 +467,6 @@ cglobal rv40_weight_func_%1_%2, 6, 7, 8 REP_RET %endmacro -INIT_MMX mmxext -RV40_WEIGHT rnd,8, 3 -RV40_WEIGHT rnd, 16, 4 -RV40_WEIGHT nornd, 8, 3 -RV40_WEIGHT nornd, 16, 4 - INIT_XMM sse2 RV40_WEIGHT rnd,8, 3 RV40_WEIGHT rnd, 16, 4 diff --git a/libavcodec/x86/rv40dsp_init.c b/libavcodec/x86/rv40dsp_init.c index 7a05ab14ad..a04c5a5449 100644 --- a/libavcodec/x86/rv40dsp_init.c +++ b/libavcodec/x86/rv40dsp_init.c @@ -44,15 +44,11 @@ void ff_put_rv40_chroma_mc8_mmx (uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y); void ff_avg_rv40_chroma_mc8_mmxext(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y); -void ff_avg_rv40_chroma_mc8_3dnow(uint8_t *dst, uint8_t *src, - ptrdiff_t stride, int h, int x, int y); void ff_put_rv40_chroma_mc4_mmx (uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y); void ff_avg_rv40_chroma_mc4_mmxext(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y); -void ff_avg_rv40_chroma_mc4_3dnow(uint8_t *dst, uint8_t *src, - ptrdiff_t stride, int h, int x, int y); #define DECLARE_WEIGHT(opt) \ void ff_rv40_weight_func_rnd_16_##opt(uint8_t *dst, uint8_t *src1, uint8_t *src2, \ @@ -63,7 +59,6 @@ void ff_rv40_weight_func_nornd_16_##opt(uint8_t *dst, uint8_t *src1, uint8_t *sr int w1, int w2, ptrdiff_t stride); \ void ff_rv40_weight_func_nornd_8_##opt (uint8_t *dst, uint8_t *src1, uint8_t *src2, \ int w1, int w2, ptrdiff_t stride); -DECLARE_WEIGHT(mmxext) DECLARE_WEIGHT(sse2) DECLARE_WEIGHT(ssse3) @@ -148,25 +143,6 @@ QPEL_MC_DECL(avg_, _ssse3) QPEL_MC_DECL(put_, _sse2) QPEL_MC_DECL(avg_, _sse2) -#if ARCH_X86_32 -#undef LOOPSIZE -#undef HCOFF -#undef VCOFF -#define LOOPSIZE 4 -#define HCOFF(x) (64 * ((x) - 1)) -#define VCOFF(x) (64 * ((x) - 1)) - -QPEL_MC_DECL(put_, _mmx) - -#define ff_put_rv40_qpel_h_mmxext ff_put_rv40_qpel_h_mmx -#define ff_put_rv40_qpel_v_mmxext ff_put_rv40_qpel_v_mmx -QPEL_MC_DECL(avg_, _mmxext) - -#define ff_put_rv40_qpel_h_3dnow ff_put_rv40_qpel_h_mmx -#define ff_put_rv40_qpel_v_3dnow ff_put_rv40_qpel_v_mmx -QPEL_MC_DECL(avg_, _3dnow) -#endif - /** @{ */ /** Set one function */ #define QPEL_FUNC_SET(OP, SIZE, PH, PV, OPT)\ @@ -207,9 +183,6 @@ DEFINE_FN(avg, 16, ssse3) #if HAVE_MMX_INLINE DEFINE_FN(put, 8, mmx) -DEFINE_FN(avg, 8, mmx) -DEFINE_FN(put, 16, mmx) -DEFINE_FN(avg, 16, mmx) #endif av_cold void ff_rv40dsp_init_x86(RV34DSPContext *c) @@ -218,10 +191,7 @@ av_cold void ff_rv40dsp_init_x86(RV34DSPContext *c) #if HAVE_MMX_INLINE if (INLINE_MMX(cpu_flags)) { -c->put_pixels_tab[0][15] = put_rv40_qpel16_mc33_mmx; c->put_pixels_tab[1][15] = put_rv40_qpel8_mc33_mmx; -c->avg_pixels_tab[0][15] = avg_rv40_qpel16_mc33_mmx; -c->avg_pi
[FFmpeg-cvslog] avfilter/x86/vf_noise: Remove obsolete MMX function
ffmpeg | branch: master | Andreas Rheinhardt | Tue Jun 7 22:26:33 2022 +0200| [c5dd2fdc090fab7e2d0edce4fb3b463028c5fafb] | committer: Andreas Rheinhardt avfilter/x86/vf_noise: Remove obsolete MMX function x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from line_noise_mmx are truely ancient 32bit x86s it is removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c5dd2fdc090fab7e2d0edce4fb3b463028c5fafb --- libavfilter/x86/vf_noise.c | 29 - 1 file changed, 29 deletions(-) diff --git a/libavfilter/x86/vf_noise.c b/libavfilter/x86/vf_noise.c index f7a4d00336..43387c9908 100644 --- a/libavfilter/x86/vf_noise.c +++ b/libavfilter/x86/vf_noise.c @@ -25,34 +25,6 @@ #include "libavfilter/vf_noise.h" #if HAVE_INLINE_ASM -static void line_noise_mmx(uint8_t *dst, const uint8_t *src, - const int8_t *noise, int len, int shift) -{ -x86_reg mmx_len= len & (~7); -noise += shift; - -__asm__ volatile( -"mov %3, %%"FF_REG_a"\n\t" -"pcmpeqb %%mm7, %%mm7\n\t" -"psllw $15, %%mm7\n\t" -"packsswb %%mm7, %%mm7 \n\t" -".p2align 4 \n\t" -"1: \n\t" -"movq (%0, %%"FF_REG_a"), %%mm0 \n\t" -"movq (%1, %%"FF_REG_a"), %%mm1 \n\t" -"pxor %%mm7, %%mm0 \n\t" -"paddsb %%mm1, %%mm0 \n\t" -"pxor %%mm7, %%mm0 \n\t" -"movq %%mm0, (%2, %%"FF_REG_a") \n\t" -"add $8, %%"FF_REG_a"\n\t" -" js 1b \n\t" -:: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len) -: "%"FF_REG_a -); -if (mmx_len != len) -ff_line_noise_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0); -} - #if HAVE_6REGS static void line_noise_avg_mmx(uint8_t *dst, const uint8_t *src, int len, const int8_t * const *shift) @@ -132,7 +104,6 @@ av_cold void ff_noise_init_x86(NoiseContext *n) int cpu_flags = av_get_cpu_flags(); if (INLINE_MMX(cpu_flags)) { -n->line_noise = line_noise_mmx; #if HAVE_6REGS n->line_noise_avg = line_noise_avg_mmx; #endif ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avcodec/x86/hevcdsp_init: Remove obsolete MMXEXT functions
ffmpeg | branch: master | Andreas Rheinhardt | Fri Jun 10 15:47:57 2022 +0200| [338f8fd2324385fdabba7de08de25885ade2edc1] | committer: Andreas Rheinhardt avcodec/x86/hevcdsp_init: Remove obsolete MMXEXT functions x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from these functions are truely ancient 32bit x86s they are removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=338f8fd2324385fdabba7de08de25885ade2edc1 --- libavcodec/x86/hevc_idct.asm | 1 - libavcodec/x86/hevcdsp_init.c | 4 2 files changed, 5 deletions(-) diff --git a/libavcodec/x86/hevc_idct.asm b/libavcodec/x86/hevc_idct.asm index 1eb1973f27..ce41f33822 100644 --- a/libavcodec/x86/hevc_idct.asm +++ b/libavcodec/x86/hevc_idct.asm @@ -811,7 +811,6 @@ cglobal hevc_idct_32x32_%1, 1, 6, 16, 256, coeffs %macro INIT_IDCT_DC 1 INIT_MMX mmxext IDCT_DC_NL 4, %1 -IDCT_DC 8, 2, %1 INIT_XMM sse2 IDCT_DC_NL 8, %1 diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c index 48f48a925f..f7a5b28bec 100644 --- a/libavcodec/x86/hevcdsp_init.c +++ b/libavcodec/x86/hevcdsp_init.c @@ -64,7 +64,6 @@ void ff_hevc_idct_ ## W ## _dc_10_ ## opt(int16_t *coeffs); \ void ff_hevc_idct_ ## W ## _dc_12_ ## opt(int16_t *coeffs) IDCT_DC_FUNCS(4x4, mmxext); -IDCT_DC_FUNCS(8x8, mmxext); IDCT_DC_FUNCS(8x8, sse2); IDCT_DC_FUNCS(16x16, sse2); IDCT_DC_FUNCS(32x32, sse2); @@ -712,7 +711,6 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) if (bit_depth == 8) { if (EXTERNAL_MMXEXT(cpu_flags)) { c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_mmxext; -c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_mmxext; c->add_residual[0] = ff_hevc_add_residual_4_8_mmxext; } @@ -889,7 +887,6 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) if (EXTERNAL_MMXEXT(cpu_flags)) { c->add_residual[0] = ff_hevc_add_residual_4_10_mmxext; c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_mmxext; -c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_mmxext; } if (EXTERNAL_SSE2(cpu_flags)) { c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2; @@ -1105,7 +1102,6 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) } else if (bit_depth == 12) { if (EXTERNAL_MMXEXT(cpu_flags)) { c->idct_dc[0] = ff_hevc_idct_4x4_dc_12_mmxext; -c->idct_dc[1] = ff_hevc_idct_8x8_dc_12_mmxext; } if (EXTERNAL_SSE2(cpu_flags)) { c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avcodec/x86/mpegvideoencdsp: Remove obsolete MMX(EXT) functions
ffmpeg | branch: master | Andreas Rheinhardt | Sat Jun 11 04:09:24 2022 +0200| [55d8618a4772513125678c9098fa6ca77dfdb3c3] | committer: Andreas Rheinhardt avcodec/x86/mpegvideoencdsp: Remove obsolete MMX(EXT) functions x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from these functions are truely ancient 32bit x86s they are removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=55d8618a4772513125678c9098fa6ca77dfdb3c3 --- libavcodec/x86/mpegvideoencdsp.asm| 49 ++- libavcodec/x86/mpegvideoencdsp_init.c | 14 -- 2 files changed, 2 insertions(+), 61 deletions(-) diff --git a/libavcodec/x86/mpegvideoencdsp.asm b/libavcodec/x86/mpegvideoencdsp.asm index aec73f82dc..e3d88f01c4 100644 --- a/libavcodec/x86/mpegvideoencdsp.asm +++ b/libavcodec/x86/mpegvideoencdsp.asm @@ -23,21 +23,15 @@ %include "libavutil/x86/x86util.asm" -SECTION_RODATA - -cextern pw_1 - SECTION .text -; int ff_pix_sum16_mmx(uint8_t *pix, int line_size) +; int ff_pix_sum16(uint8_t *pix, int line_size) ; %1 = number of loops ; %2 = number of GPRs used %macro PIX_SUM16 3 cglobal pix_sum16, 2, %2, 6 movsxdifnidn r1, r1d mov r2, %1 -%if mmsize == 16 lea r3, [r1*3] -%endif %if notcpuflag(xop) pxor m5, m5 %endif @@ -50,56 +44,27 @@ cglobal pix_sum16, 2, %2, 6 vphaddubqm3, [r0+r3] %else mova m0, [r0] -%if mmsize == 8 -mova m1, [r0+8] -%if cpuflag(mmxext) -mova m2, [r0+r1] -mova m3, [r0+r1+8] -%endif -%else ; sse2 mova m1, [r0+r1] mova m2, [r0+r1*2] mova m3, [r0+r3] -%endif -%if cpuflag(mmxext) psadbw m0, m5 psadbw m1, m5 psadbw m2, m5 psadbw m3, m5 -%else ; mmx -punpckhbwm2, m0, m5 -punpcklbwm0, m5 -punpckhbwm3, m1, m5 -punpcklbwm1, m5 -%endif ; cpuflag(mmxext) %endif ; cpuflag(xop) paddwm1, m0 paddwm3, m2 paddwm3, m1 paddwm4, m3 -%if cpuflag(mmxext) lea r0, [r0+r1*%3] -%else -add r0, r1 -%endif dec r2 jne .loop -%if mmsize == 16 pshufd m0, m4, q0032 padddm4, m0 -%elif notcpuflag(mmxext) -HADDWm4, m5 -%endif movdeax, m4 RET %endmacro -%if ARCH_X86_32 -INIT_MMX mmx -PIX_SUM16 16, 3, 0 -INIT_MMX mmxext -PIX_SUM16 8, 4, 2 -%endif INIT_XMM sse2 PIX_SUM16 4, 4, 4 %if HAVE_XOP_EXTERNAL @@ -107,7 +72,7 @@ INIT_XMM xop PIX_SUM16 4, 4, 4 %endif -; int ff_pix_norm1_mmx(uint8_t *pix, int line_size) +; int ff_pix_norm1(uint8_t *pix, int line_size) ; %1 = number of xmm registers used ; %2 = number of loops %macro PIX_NORM1 2 @@ -118,11 +83,7 @@ cglobal pix_norm1, 2, 3, %1 pxor m5, m5 .loop: mova m2, [r0+0] -%if mmsize == 8 -mova m3, [r0+8] -%else mova m3, [r0+r1] -%endif punpckhbwm1, m2, m0 punpcklbwm2, m0 punpckhbwm4, m3, m0 @@ -135,11 +96,7 @@ cglobal pix_norm1, 2, 3, %1 padddm4, m3 padddm5, m2 padddm5, m4 -%if mmsize == 8 -add r0, r1 -%else lea r0, [r0+r1*2] -%endif dec r2 jne .loop HADDDm5, m1 @@ -147,8 +104,6 @@ cglobal pix_norm1, 2, 3, %1 RET %endmacro -INIT_MMX mmx -PIX_NORM1 0, 16 INIT_XMM sse2 PIX_NORM1 6, 8 diff --git a/libavcodec/x86/mpegvideoencdsp_init.c b/libavcodec/x86/mpegvideoencdsp_init.c index 532836cec9..b9c80b5382 100644 --- a/libavcodec/x86/mpegvideoencdsp_init.c +++ b/libavcodec/x86/mpegvideoencdsp_init.c @@ -23,11 +23,8 @@ #include "libavcodec/avcodec.h" #include "libavcodec/mpegvideoencdsp.h" -int ff_pix_sum16_mmx(uint8_t *pix, int line_size); -int ff_pix_sum16_mmxext(uint8_t *pix, int line_size); int ff_pix_sum16_sse2(uint8_t *pix, int line_size); int ff_pix_sum16_xop(uint8_t *pix, int line_size); -int ff_pix_norm1_mmx(uint8_t *pix, int line_size); int ff_pix_norm1_sse2(uint8_t *pix, int line_size); #if HAVE_INLINE_ASM @@ -219,17 +216,6 @@ av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c, { int cpu_flags = av_get_cpu_flags(); -#if ARCH_X86_32 -if (EXTERNAL_MMX(cpu_flags)) { -c->pix_sum = ff_pix_sum16_mmx; -c->pix_norm1 = ff_pix_norm1_mmx; -} - -if (EXTERNAL_MMXEXT(cpu_flags)) { -c->pix_sum = ff_pix_sum16_mmxext; -} -#endif - if (EXTERNAL_SSE2(cpu_flags)) { c->pix_sum = ff_pix_sum16_sse2; c->pix_norm1 = ff_pix_norm1_sse2; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsub
[FFmpeg-cvslog] avcodec/x86/me_cmp: Remove obsolete MMX(EXT) functions
ffmpeg | branch: master | Andreas Rheinhardt | Tue Jun 7 23:34:42 2022 +0200| [542765ce3eccbca587d54262a512cbdb1407230d] | committer: Andreas Rheinhardt avcodec/x86/me_cmp: Remove obsolete MMX(EXT) functions x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from these functions are truely ancient 32bit x86s they are removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=542765ce3eccbca587d54262a512cbdb1407230d --- libavcodec/x86/me_cmp.asm| 9 +- libavcodec/x86/me_cmp_init.c | 349 +-- 2 files changed, 6 insertions(+), 352 deletions(-) diff --git a/libavcodec/x86/me_cmp.asm b/libavcodec/x86/me_cmp.asm index ad06d485ab..10809bbfb1 100644 --- a/libavcodec/x86/me_cmp.asm +++ b/libavcodec/x86/me_cmp.asm @@ -261,11 +261,10 @@ hadamard8_16_wrapper 0, 14 %endif %endmacro -INIT_MMX mmx -HADAMARD8_DIFF - +%if HAVE_ALIGNED_STACK == 0 INIT_MMX mmxext HADAMARD8_DIFF +%endif INIT_XMM sse2 %if ARCH_X86_64 @@ -385,10 +384,6 @@ cglobal sum_abs_dctelem, 1, 1, %1, block RET %endmacro -INIT_MMX mmx -SUM_ABS_DCTELEM 0, 4 -INIT_MMX mmxext -SUM_ABS_DCTELEM 0, 4 INIT_XMM sse2 SUM_ABS_DCTELEM 7, 2 INIT_XMM ssse3 diff --git a/libavcodec/x86/me_cmp_init.c b/libavcodec/x86/me_cmp_init.c index 9af911bb88..61e9396b8f 100644 --- a/libavcodec/x86/me_cmp_init.c +++ b/libavcodec/x86/me_cmp_init.c @@ -30,8 +30,6 @@ #include "libavcodec/me_cmp.h" #include "libavcodec/mpegvideo.h" -int ff_sum_abs_dctelem_mmx(int16_t *block); -int ff_sum_abs_dctelem_mmxext(int16_t *block); int ff_sum_abs_dctelem_sse2(int16_t *block); int ff_sum_abs_dctelem_ssse3(int16_t *block); int ff_sse8_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, @@ -85,7 +83,6 @@ int ff_vsad16_approx_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, int ff_hadamard8_diff16_ ## cpu(MpegEncContext *s, uint8_t *src1, \ uint8_t *src2, ptrdiff_t stride, int h); -hadamard_func(mmx) hadamard_func(mmxext) hadamard_func(sse2) hadamard_func(ssse3) @@ -126,232 +123,12 @@ static int nsse8_mmx(MpegEncContext *c, uint8_t *pix1, uint8_t *pix2, #if HAVE_INLINE_ASM -static int vsad_intra16_mmx(MpegEncContext *v, uint8_t *pix, uint8_t *dummy, -ptrdiff_t stride, int h) -{ -int tmp; - -av_assert2(((uintptr_t) pix & 7) == 0); -av_assert2((stride & 7) == 0); - -#define SUM(in0, in1, out0, out1) \ -"movq (%0), %%mm2\n"\ -"movq 8(%0), %%mm3\n" \ -"add %2,%0\n" \ -"movq %%mm2, " #out0 "\n" \ -"movq %%mm3, " #out1 "\n" \ -"psubusb " #in0 ", %%mm2\n" \ -"psubusb " #in1 ", %%mm3\n" \ -"psubusb " #out0 ", " #in0 "\n" \ -"psubusb " #out1 ", " #in1 "\n" \ -"por %%mm2, " #in0 "\n" \ -"por %%mm3, " #in1 "\n" \ -"movq " #in0 ", %%mm2\n"\ -"movq " #in1 ", %%mm3\n"\ -"punpcklbw %%mm7, " #in0 "\n" \ -"punpcklbw %%mm7, " #in1 "\n" \ -"punpckhbw %%mm7, %%mm2\n" \ -"punpckhbw %%mm7, %%mm3\n" \ -"paddw " #in1 ", " #in0 "\n"\ -"paddw %%mm3, %%mm2\n" \ -"paddw %%mm2, " #in0 "\n" \ -"paddw " #in0 ", %%mm6\n" - - -__asm__ volatile ( -"movl%3, %%ecx\n" -"pxor %%mm6, %%mm6\n" -"pxor %%mm7, %%mm7\n" -"movq (%0), %%mm0\n" -"movq 8(%0), %%mm1\n" -"add %2, %0\n" -"jmp 2f\n" -"1:\n" - -SUM(%%mm4, %%mm5, %%mm0, %%mm1) -"2:\n" -SUM(%%mm0, %%mm1, %%mm4, %%mm5) - -"subl $2, %%ecx\n" -"jnz 1b\n" - -"movq %%mm6, %%mm0\n" -"psrlq $32, %%mm6\n" -"paddw %%mm6, %%mm0\n" -"movq %%mm0, %%mm6\n" -"psrlq $16, %%mm0\n" -"paddw %%mm6, %%mm0\n" -"movd %%mm0, %1\n" -: "+r" (pix), "=r" (tmp) -: "r" (stride), "m" (h) -: "%ecx"); - -return tmp & 0x; -} -#undef SUM - -static int vsad16_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, - ptrdiff_t stride, int h) -{ -int tmp; - -av_assert2(((uintptr_t)pix1 & 7) == 0); -av_assert2(((uintptr_t)pix2 & 7) == 0); -av_assert2((stride & 7) == 0); - -#define SUM(in0, in1, out0, out1) \ -"movq (%0), %%mm2\n"\ -"movq (%1), " #out0 "\n"\ -"movq 8(%0), %%mm3\n" \ -"movq 8(%1), " #out1 "\n" \ -"add %3, %0\n"
[FFmpeg-cvslog] avcodec/x86/h264dsp_init: Remove obsolete MMX(EXT) functions
ffmpeg | branch: master | Andreas Rheinhardt | Sat Jun 11 16:24:23 2022 +0200| [4618f36a2424a3a4d5760afabc2e9dd18d73f0a4] | committer: Andreas Rheinhardt avcodec/x86/h264dsp_init: Remove obsolete MMX(EXT) functions x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from these functions are truely ancient 32bit x86s they are removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4618f36a2424a3a4d5760afabc2e9dd18d73f0a4 --- libavcodec/x86/h264_deblock.asm | 196 - libavcodec/x86/h264_deblock_10bit.asm | 42 +--- libavcodec/x86/h264_idct.asm | 382 -- libavcodec/x86/h264_weight.asm| 36 libavcodec/x86/h264dsp_init.c | 95 + 5 files changed, 9 insertions(+), 742 deletions(-) diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm index a2e745cd8e..479e6c3460 100644 --- a/libavcodec/x86/h264_deblock.asm +++ b/libavcodec/x86/h264_deblock.asm @@ -581,8 +581,6 @@ cglobal deblock_h_luma_8, 0,5,8,0x60+12 RET %endmacro ; DEBLOCK_LUMA -INIT_MMX mmxext -DEBLOCK_LUMA v8, 8 INIT_XMM sse2 DEBLOCK_LUMA v, 16 %if HAVE_AVX_EXTERNAL @@ -864,200 +862,6 @@ DEBLOCK_LUMA_INTRA v INIT_XMM avx DEBLOCK_LUMA_INTRA v %endif -%if ARCH_X86_64 == 0 -INIT_MMX mmxext -DEBLOCK_LUMA_INTRA v8 -%endif - -INIT_MMX mmxext - -%macro CHROMA_V_START 0 -decr2d ; alpha-1 -decr3d ; beta-1 -movt5, r0 -subt5, r1 -subt5, r1 -%endmacro - -%macro CHROMA_H_START 0 -decr2d -decr3d -subr0, 2 -leat6, [r1*3] -movt5, r0 -addr0, t6 -%endmacro - -%define t5 r5 -%define t6 r6 - -;- -; void ff_deblock_v_chroma(uint8_t *pix, int stride, int alpha, int beta, -; int8_t *tc0) -;- -cglobal deblock_v_chroma_8, 5,6 -CHROMA_V_START -movq m0, [t5] -movq m1, [t5+r1] -movq m2, [r0] -movq m3, [r0+r1] -call ff_chroma_inter_body_mmxext -movq [t5+r1], m1 -movq [r0], m2 -RET - -;- -; void ff_deblock_h_chroma(uint8_t *pix, int stride, int alpha, int beta, -; int8_t *tc0) -;- -cglobal deblock_h_chroma_8, 5,7 -%if ARCH_X86_64 -; This could use the red zone on 64 bit unix to avoid the stack pointer -; readjustment, but valgrind assumes the red zone is clobbered on -; function calls and returns. -sub rsp, 16 -%define buf0 [rsp] -%define buf1 [rsp+8] -%else -%define buf0 r0m -%define buf1 r2m -%endif -CHROMA_H_START -TRANSPOSE4x8_LOAD bw, wd, dq, PASS8ROWS(t5, r0, r1, t6) -movq buf0, m0 -movq buf1, m3 -LOAD_MASK r2d, r3d -movd m6, [r4] ; tc0 -punpcklbw m6, m6 -pand m7, m6 -DEBLOCK_P0_Q0 -movq m0, buf0 -movq m3, buf1 -TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6) -%if ARCH_X86_64 -add rsp, 16 -%endif -RET - -ALIGN 16 -ff_chroma_inter_body_mmxext: -LOAD_MASK r2d, r3d -movd m6, [r4] ; tc0 -punpcklbw m6, m6 -pand m7, m6 -DEBLOCK_P0_Q0 -ret - -%define t5 r4 -%define t6 r5 - -cglobal deblock_h_chroma422_8, 5, 6 -SUB rsp, (1+ARCH_X86_64*2)*mmsize -%if ARCH_X86_64 -%define buf0 [rsp+16] -%define buf1 [rsp+8] -%else -%define buf0 r0m -%define buf1 r2m -%endif - -movd m6, [r4] -punpcklbw m6, m6 -movq [rsp], m6 -CHROMA_H_START - -TRANSPOSE4x8B_LOAD PASS8ROWS(t5, r0, r1, t6) -movq buf0, m0 -movq buf1, m3 -LOAD_MASK r2d, r3d -movd m6, [rsp] -punpcklwd m6, m6 -pand m7, m6 -DEBLOCK_P0_Q0 -movq m0, buf0 -movq m3, buf1 -TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6) - -lea r0, [r0+r1*8] -lea t5, [t5+r1*8] - -TRANSPOSE4x8B_LOAD PASS8ROWS(t5, r0, r1, t6) -movq buf0, m0 -movq buf1, m3 -LOAD_MASK r2d, r3d -movd m6, [rsp+4] -punpcklwd m6, m6 -pand m7, m6 -DEBLOCK_P0_Q0 -movq m0, buf0 -movq m3, buf1 -TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6) -ADD rsp, (1+ARCH_X86_64*2)*mmsize -RET - -; in: %1=p0 %2=p1 %3=q1 -; out: p0 = (p0 + q1 + 2*p1 + 2) >> 2 -%macro CHROMA_INTRA_P0 3 -movqm4, %1 -pxorm4, %3 -pandm4, [pb_1] ; m4 = (p0^q1)&1 -pavgb %1, %3 -psubusb %1, m4 -pavgb %1, %2 ; dst = avg(p1, avg(p0,q1) - ((p0^q1)&1)) -%endmacro - -;
[FFmpeg-cvslog] avcodec/x86/sbrdsp: Remove obsolete SSE function
ffmpeg | branch: master | Andreas Rheinhardt | Fri Jun 10 20:10:45 2022 +0200| [3d151bab323271f476b21d5d84a8cdd6dc20a760] | committer: Andreas Rheinhardt avcodec/x86/sbrdsp: Remove obsolete SSE function x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from ff_sbr_qmf_deint_bfly_sse are truely ancient 32bit x86s it is removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3d151bab323271f476b21d5d84a8cdd6dc20a760 --- libavcodec/x86/sbrdsp.asm| 16 +--- libavcodec/x86/sbrdsp_init.c | 2 -- 2 files changed, 1 insertion(+), 17 deletions(-) diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm index 62bbe512ec..87dcdc43ce 100644 --- a/libavcodec/x86/sbrdsp.asm +++ b/libavcodec/x86/sbrdsp.asm @@ -251,7 +251,7 @@ cglobal sbr_neg_odd_64, 1,2,4,z REP_RET ; void ff_sbr_qmf_deint_bfly_sse2(float *v, const float *src0, const float *src1) -%macro SBR_QMF_DEINT_BFLY 0 +INIT_XMM sse2 cglobal sbr_qmf_deint_bfly, 3,5,8, v,src0,src1,vrev,c mov cq, 64*4-2*mmsize leavrevq, [vq + 64*4] @@ -260,17 +260,10 @@ cglobal sbr_qmf_deint_bfly, 3,5,8, v,src0,src1,vrev,c mova m1, [src1q] mova m4, [src0q+cq+mmsize] mova m5, [src1q+mmsize] -%if cpuflag(sse2) pshufdm2, m0, q0123 pshufdm3, m1, q0123 pshufdm6, m4, q0123 pshufdm7, m5, q0123 -%else -shufpsm2, m0, m0, q0123 -shufpsm3, m1, m1, q0123 -shufpsm6, m4, m4, q0123 -shufpsm7, m5, m5, q0123 -%endif addps m5, m2 subps m0, m7 addps m1, m6 @@ -284,13 +277,6 @@ cglobal sbr_qmf_deint_bfly, 3,5,8, v,src0,src1,vrev,c sub cq, 2*mmsize jge.loop REP_RET -%endmacro - -INIT_XMM sse -SBR_QMF_DEINT_BFLY - -INIT_XMM sse2 -SBR_QMF_DEINT_BFLY INIT_XMM sse2 cglobal sbr_qmf_pre_shuffle, 1,4,6,z diff --git a/libavcodec/x86/sbrdsp_init.c b/libavcodec/x86/sbrdsp_init.c index 6911a1a515..999f681220 100644 --- a/libavcodec/x86/sbrdsp_init.c +++ b/libavcodec/x86/sbrdsp_init.c @@ -34,7 +34,6 @@ void ff_sbr_hf_gen_sse(float (*X_high)[2], const float (*X_low)[2], float bw, int start, int end); void ff_sbr_neg_odd_64_sse(float *z); void ff_sbr_qmf_post_shuffle_sse(float W[32][2], const float *z); -void ff_sbr_qmf_deint_bfly_sse(float *v, const float *src0, const float *src1); void ff_sbr_qmf_deint_bfly_sse2(float *v, const float *src0, const float *src1); void ff_sbr_qmf_pre_shuffle_sse2(float *z); @@ -67,7 +66,6 @@ av_cold void ff_sbrdsp_init_x86(SBRDSPContext *s) s->hf_g_filt = ff_sbr_hf_g_filt_sse; s->hf_gen = ff_sbr_hf_gen_sse; s->qmf_post_shuffle = ff_sbr_qmf_post_shuffle_sse; -s->qmf_deint_bfly = ff_sbr_qmf_deint_bfly_sse; s->qmf_deint_neg= ff_sbr_qmf_deint_neg_sse; s->autocorrelate= ff_sbr_autocorrelate_sse; } ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avcodec/x86/xvididct: Remove obsolete MMX(EXT) functions
ffmpeg | branch: master | Andreas Rheinhardt | Fri Jun 10 20:47:39 2022 +0200| [b2437a45af58b0a9d726f1ee082e7d2809175b99] | committer: Andreas Rheinhardt avcodec/x86/xvididct: Remove obsolete MMX(EXT) functions x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from these functions are truely ancient 32bit x86s they are removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b2437a45af58b0a9d726f1ee082e7d2809175b99 --- libavcodec/tests/x86/dct.c | 4 - libavcodec/x86/xvididct.asm| 411 - libavcodec/x86/xvididct.h | 8 - libavcodec/x86/xvididct_init.c | 43 - 4 files changed, 466 deletions(-) diff --git a/libavcodec/tests/x86/dct.c b/libavcodec/tests/x86/dct.c index 6e3d8f7c01..207a2bcb36 100644 --- a/libavcodec/tests/x86/dct.c +++ b/libavcodec/tests/x86/dct.c @@ -69,10 +69,6 @@ static const struct algo idct_tab_arch[] = { { "SIMPLE-MMX", ff_simple_idct_mmx, FF_IDCT_PERM_SIMPLE, AV_CPU_FLAG_MMX }, #endif #if CONFIG_MPEG4_DECODER && HAVE_X86ASM -#if ARCH_X86_32 -{ "XVID-MMX",ff_xvid_idct_mmx,FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMX,1 }, -{ "XVID-MMXEXT", ff_xvid_idct_mmxext, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMXEXT, 1 }, -#endif #if HAVE_SSE2_EXTERNAL { "XVID-SSE2", ff_xvid_idct_sse2, FF_IDCT_PERM_SSE2, AV_CPU_FLAG_SSE2, 1 }, #endif diff --git a/libavcodec/x86/xvididct.asm b/libavcodec/x86/xvididct.asm index 0220885da6..4197551cdf 100644 --- a/libavcodec/x86/xvididct.asm +++ b/libavcodec/x86/xvididct.asm @@ -91,145 +91,6 @@ iTab4: dw 0x4b42, 0x6254, 0xb4be, 0x9dac, 0x4b42, 0xd746, 0x4b42, 0xd746 dw 0x3b21, 0x14c3, 0x587e, 0xeb3d, 0x14c3, 0x587e, 0x14c3, 0xc4df dw 0x6862, 0x587e, 0x979e, 0xc4df, 0x3b21, 0x979e, 0x587e, 0x979e -%if ARCH_X86_32 -; - -; -; The first stage iDCT 8x8 - inverse DCTs of rows -; -; - -; The 8-point inverse DCT direct algorithm -; - -; -; static const short w[32] = { -; FIX(cos_4_16), FIX(cos_2_16), FIX(cos_4_16), FIX(cos_6_16), -; FIX(cos_4_16), FIX(cos_6_16), -FIX(cos_4_16), -FIX(cos_2_16), -; FIX(cos_4_16), -FIX(cos_6_16), -FIX(cos_4_16), FIX(cos_2_16), -; FIX(cos_4_16), -FIX(cos_2_16), FIX(cos_4_16), -FIX(cos_6_16), -; FIX(cos_1_16), FIX(cos_3_16), FIX(cos_5_16), FIX(cos_7_16), -; FIX(cos_3_16), -FIX(cos_7_16), -FIX(cos_1_16), -FIX(cos_5_16), -; FIX(cos_5_16), -FIX(cos_1_16), FIX(cos_7_16), FIX(cos_3_16), -; FIX(cos_7_16), -FIX(cos_5_16), FIX(cos_3_16), -FIX(cos_1_16) }; -; -; #define DCT_8_INV_ROW(x, y) -; { -; int a0, a1, a2, a3, b0, b1, b2, b3; -; -; a0 = x[0] * w[0] + x[2] * w[1] + x[4] * w[2] + x[6] * w[3]; -; a1 = x[0] * w[4] + x[2] * w[5] + x[4] * w[6] + x[6] * w[7]; -; a2 = x[0] * w[8] + x[2] * w[9] + x[4] * w[10] + x[6] * w[11]; -; a3 = x[0] * w[12] + x[2] * w[13] + x[4] * w[14] + x[6] * w[15]; -; b0 = x[1] * w[16] + x[3] * w[17] + x[5] * w[18] + x[7] * w[19]; -; b1 = x[1] * w[20] + x[3] * w[21] + x[5] * w[22] + x[7] * w[23]; -; b2 = x[1] * w[24] + x[3] * w[25] + x[5] * w[26] + x[7] * w[27]; -; b3 = x[1] * w[28] + x[3] * w[29] + x[5] * w[30] + x[7] * w[31]; -; -; y[0] = SHIFT_ROUND(a0 + b0); -; y[1] = SHIFT_ROUND(a1 + b1); -; y[2] = SHIFT_ROUND(a2 + b2); -; y[3] = SHIFT_ROUND(a3 + b3); -; y[4] = SHIFT_ROUND(a3 - b3); -; y[5] = SHIFT_ROUND(a2 - b2); -; y[6] = SHIFT_ROUND(a1 - b1); -; y[7] = SHIFT_ROUND(a0 - b0); -; } -; -; - -; -; In this implementation the outputs of the iDCT-1D are multiplied -; for rows 0,4 - by cos_4_16, -; for rows 1,7 - by cos_1_16, -; for rows 2,6 - by cos_2_16, -; for rows 3,5 - by cos_3_16 -; and are shifted to the left for better accuracy. -; -; For the constants used, -; FIX(float_const) = (short) (float_const * (1 << 15) + 0.5) -; -; - - -; - -; Tables for mmx processors -; - - -; Table for rows 0,4 - constants are multiplied by cos_4_16 -tab_i_04_mmx: dw 16384, 16384, 16384, -16384 - dw 21407, 8867, 8867, -21407 ; w07 w05 w03 w01 - dw 16384, -16384, 16384, 16384 ; w14 w12 w10 w08 - dw -8867, 21407, -21407, -8867 ; w15 w13 w11 w09 - dw 22725, 12873, 192
[FFmpeg-cvslog] avcodec/x86/idctdsp: Remove obsolete MMX(EXT) functions
ffmpeg | branch: master | Andreas Rheinhardt | Fri Jun 10 20:28:06 2022 +0200| [bfb28b5ce89f3e950214b67ea95b45e3355c2caf] | committer: Andreas Rheinhardt avcodec/x86/idctdsp: Remove obsolete MMX(EXT) functions x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from these functions are truely ancient 32bit x86s they are removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=bfb28b5ce89f3e950214b67ea95b45e3355c2caf --- libavcodec/tests/x86/dct.c | 3 -- libavcodec/x86/idctdsp.asm | 79 +++--- libavcodec/x86/idctdsp.h | 6 libavcodec/x86/idctdsp_init.c | 11 +++--- libavcodec/x86/simple_idct.asm | 22 ++-- 5 files changed, 10 insertions(+), 111 deletions(-) diff --git a/libavcodec/tests/x86/dct.c b/libavcodec/tests/x86/dct.c index 207a2bcb36..ef0662ae37 100644 --- a/libavcodec/tests/x86/dct.c +++ b/libavcodec/tests/x86/dct.c @@ -65,9 +65,6 @@ static const struct algo fdct_tab_arch[] = { }; static const struct algo idct_tab_arch[] = { -#if HAVE_MMX_EXTERNAL -{ "SIMPLE-MMX", ff_simple_idct_mmx, FF_IDCT_PERM_SIMPLE, AV_CPU_FLAG_MMX }, -#endif #if CONFIG_MPEG4_DECODER && HAVE_X86ASM #if HAVE_SSE2_EXTERNAL { "XVID-SSE2", ff_xvid_idct_sse2, FF_IDCT_PERM_SSE2, AV_CPU_FLAG_SSE2, 1 }, diff --git a/libavcodec/x86/idctdsp.asm b/libavcodec/x86/idctdsp.asm index 089425a9ab..1cfdb5419d 100644 --- a/libavcodec/x86/idctdsp.asm +++ b/libavcodec/x86/idctdsp.asm @@ -37,47 +37,24 @@ SECTION .text %macro PUT_SIGNED_PIXELS_CLAMPED_HALF 1 mova m1, [blockq+mmsize*0+%1] mova m2, [blockq+mmsize*2+%1] -%if mmsize == 8 -mova m3, [blockq+mmsize*4+%1] -mova m4, [blockq+mmsize*6+%1] -%endif packsswb m1, [blockq+mmsize*1+%1] packsswb m2, [blockq+mmsize*3+%1] -%if mmsize == 8 -packsswb m3, [blockq+mmsize*5+%1] -packsswb m4, [blockq+mmsize*7+%1] -%endif paddbm1, m0 paddbm2, m0 -%if mmsize == 8 -paddbm3, m0 -paddbm4, m0 -movq [pixelsq+lsizeq*0], m1 -movq [pixelsq+lsizeq*1], m2 -movq [pixelsq+lsizeq*2], m3 -movq [pixelsq+lsize3q ], m4 -%else movq [pixelsq+lsizeq*0], m1 movhps [pixelsq+lsizeq*1], m1 movq [pixelsq+lsizeq*2], m2 movhps [pixelsq+lsize3q ], m2 -%endif %endmacro -%macro PUT_SIGNED_PIXELS_CLAMPED 1 -cglobal put_signed_pixels_clamped, 3, 4, %1, block, pixels, lsize, lsize3 +INIT_XMM sse2 +cglobal put_signed_pixels_clamped, 3, 4, 3, block, pixels, lsize, lsize3 mova m0, [pb_80] lea lsize3q, [lsizeq*3] PUT_SIGNED_PIXELS_CLAMPED_HALF 0 lea pixelsq, [pixelsq+lsizeq*4] PUT_SIGNED_PIXELS_CLAMPED_HALF 64 RET -%endmacro - -INIT_MMX mmx -PUT_SIGNED_PIXELS_CLAMPED 0 -INIT_XMM sse2 -PUT_SIGNED_PIXELS_CLAMPED 3 ;-- ; void ff_put_pixels_clamped(const int16_t *block, uint8_t *pixels, @@ -87,40 +64,21 @@ PUT_SIGNED_PIXELS_CLAMPED 3 %macro PUT_PIXELS_CLAMPED_HALF 1 mova m0, [blockq+mmsize*0+%1] mova m1, [blockq+mmsize*2+%1] -%if mmsize == 8 -mova m2, [blockq+mmsize*4+%1] -mova m3, [blockq+mmsize*6+%1] -%endif packuswb m0, [blockq+mmsize*1+%1] packuswb m1, [blockq+mmsize*3+%1] -%if mmsize == 8 -packuswb m2, [blockq+mmsize*5+%1] -packuswb m3, [blockq+mmsize*7+%1] -movq [pixelsq], m0 -movq[lsizeq+pixelsq], m1 -movq [2*lsizeq+pixelsq], m2 -movq [lsize3q+pixelsq], m3 -%else movq [pixelsq], m0 movhps [lsizeq+pixelsq], m0 movq [2*lsizeq+pixelsq], m1 movhps [lsize3q+pixelsq], m1 -%endif %endmacro -%macro PUT_PIXELS_CLAMPED 0 +INIT_XMM sse2 cglobal put_pixels_clamped, 3, 4, 2, block, pixels, lsize, lsize3 lea lsize3q, [lsizeq*3] PUT_PIXELS_CLAMPED_HALF 0 lea pixelsq, [pixelsq+lsizeq*4] PUT_PIXELS_CLAMPED_HALF 64 RET -%endmacro - -INIT_MMX mmx -PUT_PIXELS_CLAMPED -INIT_XMM sse2 -PUT_PIXELS_CLAMPED ;-- ; void ff_add_pixels_clamped(const int16_t *block, uint8_t *pixels, @@ -130,41 +88,18 @@ PUT_PIXELS_CLAMPED %macro ADD_PIXELS_CLAMPED 1 mova m0, [blockq+mmsize*0+%1] mova m1, [blockq+mmsize*1+%1] -%if mmsize == 8 -mova m5, [blockq+mmsize*2+%1] -mova m6, [blockq+mmsize*3+%1] -%endif movq m2, [pixelsq] movq m3, [pixelsq+lsizeq] -%if mmsize == 8 -mova m7, m2 -punpcklbw m2, m4 -punpckhbw m7, m4 -paddsw m0, m2 -paddsw m1, m7 -mova m7, m3 -punpcklbw m3, m4 -punpckhbw m7, m4 -paddsw m5, m3 -paddsw m6, m7 -%else
[FFmpeg-cvslog] avcodec/x86/blockdsp: Remove obsolete MMX functions
ffmpeg | branch: master | Andreas Rheinhardt | Fri Jun 10 20:49:50 2022 +0200| [ee551a21ddcbf81afe183d9489c534ee80f263a0] | committer: Andreas Rheinhardt avcodec/x86/blockdsp: Remove obsolete MMX functions x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from these functions are truely ancient 32bit x86s they are removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ee551a21ddcbf81afe183d9489c534ee80f263a0 --- libavcodec/x86/blockdsp.asm| 6 -- libavcodec/x86/blockdsp_init.c | 7 --- 2 files changed, 13 deletions(-) diff --git a/libavcodec/x86/blockdsp.asm b/libavcodec/x86/blockdsp.asm index 9d203df8f5..e380308d4a 100644 --- a/libavcodec/x86/blockdsp.asm +++ b/libavcodec/x86/blockdsp.asm @@ -46,9 +46,6 @@ cglobal clear_block, 1, 1, %1, blocks RET %endmacro -INIT_MMX mmx -%define ZERO pxor -CLEAR_BLOCK 0, 4 INIT_XMM sse %define ZERO xorps CLEAR_BLOCK 1, 2 @@ -78,9 +75,6 @@ cglobal clear_blocks, 1, 2, %1, blocks, len RET %endmacro -INIT_MMX mmx -%define ZERO pxor -CLEAR_BLOCKS 0 INIT_XMM sse %define ZERO xorps CLEAR_BLOCKS 1 diff --git a/libavcodec/x86/blockdsp_init.c b/libavcodec/x86/blockdsp_init.c index d7f8a8e508..b0ff9376d9 100644 --- a/libavcodec/x86/blockdsp_init.c +++ b/libavcodec/x86/blockdsp_init.c @@ -24,10 +24,8 @@ #include "libavutil/x86/cpu.h" #include "libavcodec/blockdsp.h" -void ff_clear_block_mmx(int16_t *block); void ff_clear_block_sse(int16_t *block); void ff_clear_block_avx(int16_t *block); -void ff_clear_blocks_mmx(int16_t *blocks); void ff_clear_blocks_sse(int16_t *blocks); void ff_clear_blocks_avx(int16_t *blocks); @@ -37,11 +35,6 @@ av_cold void ff_blockdsp_init_x86(BlockDSPContext *c, #if HAVE_X86ASM int cpu_flags = av_get_cpu_flags(); -if (EXTERNAL_MMX(cpu_flags)) { -c->clear_block = ff_clear_block_mmx; -c->clear_blocks = ff_clear_blocks_mmx; -} - if (EXTERNAL_SSE(cpu_flags)) { c->clear_block = ff_clear_block_sse; c->clear_blocks = ff_clear_blocks_sse; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avcodec/x86/pixblockdsp: Remove obsolete MMX functions
ffmpeg | branch: master | Andreas Rheinhardt | Fri Jun 10 20:51:41 2022 +0200| [92b58002776edd3a3df03c90e8a3ab24b8f987de] | committer: Andreas Rheinhardt avcodec/x86/pixblockdsp: Remove obsolete MMX functions x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from these functions are truely ancient 32bit x86s they are removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=92b58002776edd3a3df03c90e8a3ab24b8f987de --- libavcodec/x86/pixblockdsp.asm| 51 +++ libavcodec/x86/pixblockdsp_init.c | 12 - 2 files changed, 3 insertions(+), 60 deletions(-) diff --git a/libavcodec/x86/pixblockdsp.asm b/libavcodec/x86/pixblockdsp.asm index 440fe29bcc..5fdd2914eb 100644 --- a/libavcodec/x86/pixblockdsp.asm +++ b/libavcodec/x86/pixblockdsp.asm @@ -25,30 +25,6 @@ SECTION .text -INIT_MMX mmx -; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, ptrdiff_t stride) -cglobal get_pixels, 3,4 -add r0, 128 -mov r3, -128 -pxor m7, m7 -.loop: -mova m0, [r1] -mova m2, [r1+r2] -mova m1, m0 -mova m3, m2 -punpcklbwm0, m7 -punpckhbwm1, m7 -punpcklbwm2, m7 -punpckhbwm3, m7 -mova [r0+r3+ 0], m0 -mova [r0+r3+ 8], m1 -mova [r0+r3+16], m2 -mova [r0+r3+24], m3 -lea r1, [r1+r2*2] -add r3, 32 -js .loop -REP_RET - INIT_XMM sse2 cglobal get_pixels, 3, 4, 5 lea r3, [r2*3] @@ -80,9 +56,9 @@ cglobal get_pixels, 3, 4, 5 mova [r0+0x70], m3 RET -; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, -; ptrdiff_t stride); -%macro DIFF_PIXELS 0 +; void ff_diff_pixels(int16_t *block, const uint8_t *s1, const uint8_t *s2, +; ptrdiff_t stride); +INIT_XMM sse2 cglobal diff_pixels, 4,5,5 pxor m4, m4 add r0, 128 @@ -90,39 +66,18 @@ cglobal diff_pixels, 4,5,5 .loop: movq m0, [r1] movq m2, [r2] -%if mmsize == 8 -movq m1, m0 -movq m3, m2 -punpcklbwm0, m4 -punpckhbwm1, m4 -punpcklbwm2, m4 -punpckhbwm3, m4 -%else movq m1, [r1+r3] movq m3, [r2+r3] punpcklbwm0, m4 punpcklbwm1, m4 punpcklbwm2, m4 punpcklbwm3, m4 -%endif psubwm0, m2 psubwm1, m3 mova [r0+r4+0], m0 mova [r0+r4+mmsize], m1 -%if mmsize == 8 -add r1, r3 -add r2, r3 -%else lea r1, [r1+r3*2] lea r2, [r2+r3*2] -%endif add r4, 2 * mmsize jne .loop RET -%endmacro - -INIT_MMX mmx -DIFF_PIXELS - -INIT_XMM sse2 -DIFF_PIXELS diff --git a/libavcodec/x86/pixblockdsp_init.c b/libavcodec/x86/pixblockdsp_init.c index 3a5eb6959c..51f2a0033a 100644 --- a/libavcodec/x86/pixblockdsp_init.c +++ b/libavcodec/x86/pixblockdsp_init.c @@ -23,10 +23,7 @@ #include "libavutil/x86/cpu.h" #include "libavcodec/pixblockdsp.h" -void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, ptrdiff_t stride); void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, ptrdiff_t stride); -void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, -ptrdiff_t stride); void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride); @@ -36,15 +33,6 @@ av_cold void ff_pixblockdsp_init_x86(PixblockDSPContext *c, { int cpu_flags = av_get_cpu_flags(); -if (EXTERNAL_MMX(cpu_flags)) { -if (!high_bit_depth) { -c->get_pixels_unaligned = -c->get_pixels = ff_get_pixels_mmx; -} -c->diff_pixels_unaligned = -c->diff_pixels = ff_diff_pixels_mmx; -} - if (EXTERNAL_SSE2(cpu_flags)) { if (!high_bit_depth) { c->get_pixels_unaligned = ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avcodec/x86/lossless_audiodsp: Remove obsolete MMXEXT function
ffmpeg | branch: master | Andreas Rheinhardt | Fri Jun 10 20:54:45 2022 +0200| [6feea076e98512d78c8d735509ab6b5e9a71ca1c] | committer: Andreas Rheinhardt avcodec/x86/lossless_audiodsp: Remove obsolete MMXEXT function x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from ff_scalarproduct_and_madd_int16_mmxext are truely ancient 32bit x86s it is removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6feea076e98512d78c8d735509ab6b5e9a71ca1c --- libavcodec/x86/lossless_audiodsp.asm| 12 +--- libavcodec/x86/lossless_audiodsp_init.c | 6 -- 2 files changed, 1 insertion(+), 17 deletions(-) diff --git a/libavcodec/x86/lossless_audiodsp.asm b/libavcodec/x86/lossless_audiodsp.asm index 063d7b41af..ff18eb7081 100644 --- a/libavcodec/x86/lossless_audiodsp.asm +++ b/libavcodec/x86/lossless_audiodsp.asm @@ -22,18 +22,14 @@ SECTION .text -%macro SCALARPRODUCT 0 ; int ff_scalarproduct_and_madd_int16(int16_t *v1, int16_t *v2, int16_t *v3, ; int order, int mul) +INIT_XMM sse2 cglobal scalarproduct_and_madd_int16, 4,4,8, v1, v2, v3, order, mul shl orderq, 1 movdm7, mulm -%if mmsize == 16 pshuflw m7, m7, 0 punpcklqdq m7, m7 -%else -pshufw m7, m7, 0 -%endif pxorm6, m6 add v1q, orderq add v2q, orderq @@ -61,12 +57,6 @@ cglobal scalarproduct_and_madd_int16, 4,4,8, v1, v2, v3, order, mul HADDD m6, m0 movd eax, m6 RET -%endmacro - -INIT_MMX mmxext -SCALARPRODUCT -INIT_XMM sse2 -SCALARPRODUCT INIT_XMM sse4 ; int ff_scalarproduct_and_madd_int32(int16_t *v1, int32_t *v2, int16_t *v3, diff --git a/libavcodec/x86/lossless_audiodsp_init.c b/libavcodec/x86/lossless_audiodsp_init.c index f74c7e4361..462329db32 100644 --- a/libavcodec/x86/lossless_audiodsp_init.c +++ b/libavcodec/x86/lossless_audiodsp_init.c @@ -21,9 +21,6 @@ #include "libavutil/x86/cpu.h" #include "libavcodec/lossless_audiodsp.h" -int32_t ff_scalarproduct_and_madd_int16_mmxext(int16_t *v1, const int16_t *v2, - const int16_t *v3, - int order, int mul); int32_t ff_scalarproduct_and_madd_int16_sse2(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul); @@ -40,9 +37,6 @@ av_cold void ff_llauddsp_init_x86(LLAudDSPContext *c) #if HAVE_X86ASM int cpu_flags = av_get_cpu_flags(); -if (EXTERNAL_MMXEXT(cpu_flags)) -c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mmxext; - if (EXTERNAL_SSE2(cpu_flags)) c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avcodec/x86/svq1enc: Remove obsolete MMXEXT function
ffmpeg | branch: master | Andreas Rheinhardt | Fri Jun 10 20:58:35 2022 +0200| [9426a2f8ff4607b7293e6140e56b8cc44e629dbd] | committer: Andreas Rheinhardt avcodec/x86/svq1enc: Remove obsolete MMXEXT function x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from ff_ssd_int8_vs_int16_mmx are truely ancient 32bit x86s it is removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9426a2f8ff4607b7293e6140e56b8cc44e629dbd --- libavcodec/x86/svq1enc.asm| 22 +- libavcodec/x86/svq1enc_init.c | 5 - 2 files changed, 1 insertion(+), 26 deletions(-) diff --git a/libavcodec/x86/svq1enc.asm b/libavcodec/x86/svq1enc.asm index a87632836d..123e86ba24 100644 --- a/libavcodec/x86/svq1enc.asm +++ b/libavcodec/x86/svq1enc.asm @@ -23,39 +23,19 @@ SECTION .text -%macro SSD_INT8_VS_INT16 0 +INIT_XMM sse2 cglobal ssd_int8_vs_int16, 3, 3, 3, pix1, pix2, size pxor m0, m0 .loop: sub sizeq, 8 movq m1, [pix1q + sizeq] mova m2, [pix2q + sizeq*2] -%if mmsize == 8 -movq m3, [pix2q + sizeq*2 + mmsize] -punpckhbw m4, m1 -punpcklbw m1, m1 -psraw m4, 8 -psraw m1, 8 -psubw m3, m4 -psubw m2, m1 -pmaddwd m3, m3 -pmaddwd m2, m2 -paddd m0, m3 -paddd m0, m2 -%else punpcklbw m1, m1 psraw m1, 8 psubw m2, m1 pmaddwd m2, m2 paddd m0, m2 -%endif jg .loop HADDD m0, m1 movd eax, m0 RET -%endmacro - -INIT_MMX mmx -SSD_INT8_VS_INT16 -INIT_XMM sse2 -SSD_INT8_VS_INT16 diff --git a/libavcodec/x86/svq1enc_init.c b/libavcodec/x86/svq1enc_init.c index 40b4b0e183..787a5245f3 100644 --- a/libavcodec/x86/svq1enc_init.c +++ b/libavcodec/x86/svq1enc_init.c @@ -24,8 +24,6 @@ #include "libavutil/x86/cpu.h" #include "libavcodec/svq1enc.h" -int ff_ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2, - intptr_t size); int ff_ssd_int8_vs_int16_sse2(const int8_t *pix1, const int16_t *pix2, intptr_t size); @@ -33,9 +31,6 @@ av_cold void ff_svq1enc_init_x86(SVQ1EncContext *c) { int cpu_flags = av_get_cpu_flags(); -if (EXTERNAL_MMX(cpu_flags)) { -c->ssd_int8_vs_int16 = ff_ssd_int8_vs_int16_mmx; -} if (EXTERNAL_SSE2(cpu_flags)) { c->ssd_int8_vs_int16 = ff_ssd_int8_vs_int16_sse2; } ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avcodec/x86/fmtconvert: Remove obsolete SSE functions
ffmpeg | branch: master | Andreas Rheinhardt | Fri Jun 10 21:00:55 2022 +0200| [4038b5b209cd8a0a3cb559d1073e9b22196a8ace] | committer: Andreas Rheinhardt avcodec/x86/fmtconvert: Remove obsolete SSE functions x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from these functions are truely ancient 32bit x86s they are removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4038b5b209cd8a0a3cb559d1073e9b22196a8ace --- libavcodec/x86/fmtconvert.asm| 36 libavcodec/x86/fmtconvert_init.c | 7 --- 2 files changed, 43 deletions(-) diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm index 8f62a0a093..e70df4662d 100644 --- a/libavcodec/x86/fmtconvert.asm +++ b/libavcodec/x86/fmtconvert.asm @@ -44,35 +44,17 @@ cglobal int32_to_float_fmul_scalar, 4, 4, %1, dst, src, mul, len add dstq, lenq neg lenq .loop: -%if cpuflag(sse2) cvtdq2ps m1, [srcq+lenq ] cvtdq2ps m2, [srcq+lenq+16] -%else -cvtpi2ps m1, [srcq+lenq ] -cvtpi2ps m3, [srcq+lenq+ 8] -cvtpi2ps m2, [srcq+lenq+16] -cvtpi2ps m4, [srcq+lenq+24] -movlhps m1, m3 -movlhps m2, m4 -%endif mulps m1, m0 mulps m2, m0 mova [dstq+lenq ], m1 mova [dstq+lenq+16], m2 add lenq, 32 jl .loop -%if notcpuflag(sse2) -;; cvtpi2ps switches to MMX even if the source is a memory location -;; possible an error in documentation since every tested CPU disagrees with -;; that. Use emms anyway since the vast majority of machines will use the -;; SSE2 variant -emms -%endif RET %endmacro -INIT_XMM sse -INT32_TO_FLOAT_FMUL_SCALAR 5 INIT_XMM sse2 INT32_TO_FLOAT_FMUL_SCALAR 3 @@ -89,17 +71,8 @@ cglobal int32_to_float_fmul_array8, 5, 5, 5, c, dst, src, mul, len .loop: movss m0, [mulq] SPLATDm0 -%if cpuflag(sse2) cvtdq2ps m1, [srcq+lenq ] cvtdq2ps m2, [srcq+lenq+16] -%else -cvtpi2ps m1, [srcq+lenq ] -cvtpi2ps m3, [srcq+lenq+ 8] -cvtpi2ps m2, [srcq+lenq+16] -cvtpi2ps m4, [srcq+lenq+24] -movlhps m1, m3 -movlhps m2, m4 -%endif mulps m1, m0 mulps m2, m0 mova [dstq+lenq ], m1 @@ -107,18 +80,9 @@ cglobal int32_to_float_fmul_array8, 5, 5, 5, c, dst, src, mul, len add mulq, 4 add lenq, 32 jl .loop -%if notcpuflag(sse2) -;; cvtpi2ps switches to MMX even if the source is a memory location -;; possible an error in documentation since every tested CPU disagrees with -;; that. Use emms anyway since the vast majority of machines will use the -;; SSE2 variant -emms -%endif RET %endmacro -INIT_XMM sse -INT32_TO_FLOAT_FMUL_ARRAY8 INIT_XMM sse2 INT32_TO_FLOAT_FMUL_ARRAY8 diff --git a/libavcodec/x86/fmtconvert_init.c b/libavcodec/x86/fmtconvert_init.c index df097054e4..58b396856e 100644 --- a/libavcodec/x86/fmtconvert_init.c +++ b/libavcodec/x86/fmtconvert_init.c @@ -29,10 +29,7 @@ #if HAVE_X86ASM -void ff_int32_to_float_fmul_scalar_sse (float *dst, const int32_t *src, float mul, int len); void ff_int32_to_float_fmul_scalar_sse2(float *dst, const int32_t *src, float mul, int len); -void ff_int32_to_float_fmul_array8_sse (FmtConvertContext *c, float *dst, const int32_t *src, -const float *mul, int len); void ff_int32_to_float_fmul_array8_sse2(FmtConvertContext *c, float *dst, const int32_t *src, const float *mul, int len); @@ -43,10 +40,6 @@ av_cold void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx #if HAVE_X86ASM int cpu_flags = av_get_cpu_flags(); -if (EXTERNAL_SSE(cpu_flags)) { -c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse; -c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_sse; -} if (EXTERNAL_SSE2(cpu_flags)) { c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse2; c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_sse2; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avcodec/x86/hpeldsp_vp3: Remove obsolete 3dnow functions
ffmpeg | branch: master | Andreas Rheinhardt | Fri Jun 10 21:18:52 2022 +0200| [aa8a2019928ae6ff6ec4382020b26b891f64d4bd] | committer: Andreas Rheinhardt avcodec/x86/hpeldsp_vp3: Remove obsolete 3dnow functions x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from these functions are truely ancient 32bit x86s they are removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=aa8a2019928ae6ff6ec4382020b26b891f64d4bd --- libavcodec/x86/hpeldsp_vp3.asm| 16 ++-- libavcodec/x86/hpeldsp_vp3_init.c | 13 - 2 files changed, 2 insertions(+), 27 deletions(-) diff --git a/libavcodec/x86/hpeldsp_vp3.asm b/libavcodec/x86/hpeldsp_vp3.asm index cba96d06cb..88ca8e8e0a 100644 --- a/libavcodec/x86/hpeldsp_vp3.asm +++ b/libavcodec/x86/hpeldsp_vp3.asm @@ -23,7 +23,7 @@ SECTION .text ; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -%macro PUT_NO_RND_PIXELS8_X2_EXACT 0 +INIT_MMX mmxext cglobal put_no_rnd_pixels8_x2_exact, 4,5 lea r4, [r2*3] pcmpeqb m6, m6 @@ -61,16 +61,10 @@ cglobal put_no_rnd_pixels8_x2_exact, 4,5 sub r3d, 4 jg .loop REP_RET -%endmacro - -INIT_MMX mmxext -PUT_NO_RND_PIXELS8_X2_EXACT -INIT_MMX 3dnow -PUT_NO_RND_PIXELS8_X2_EXACT ; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -%macro PUT_NO_RND_PIXELS8_Y2_EXACT 0 +INIT_MMX mmxext cglobal put_no_rnd_pixels8_y2_exact, 4,5 lea r4, [r2*3] mova m0, [r1] @@ -103,9 +97,3 @@ cglobal put_no_rnd_pixels8_y2_exact, 4,5 sub r3d, 4 jg .loop REP_RET -%endmacro - -INIT_MMX mmxext -PUT_NO_RND_PIXELS8_Y2_EXACT -INIT_MMX 3dnow -PUT_NO_RND_PIXELS8_Y2_EXACT diff --git a/libavcodec/x86/hpeldsp_vp3_init.c b/libavcodec/x86/hpeldsp_vp3_init.c index 5979f4123c..1dbd1ba6f9 100644 --- a/libavcodec/x86/hpeldsp_vp3_init.c +++ b/libavcodec/x86/hpeldsp_vp3_init.c @@ -28,25 +28,12 @@ void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); -void ff_put_no_rnd_pixels8_x2_exact_3dnow(uint8_t *block, - const uint8_t *pixels, - ptrdiff_t line_size, int h); void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); -void ff_put_no_rnd_pixels8_y2_exact_3dnow(uint8_t *block, - const uint8_t *pixels, - ptrdiff_t line_size, int h); av_cold void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags) { -if (EXTERNAL_AMD3DNOW(cpu_flags)) { -if (flags & AV_CODEC_FLAG_BITEXACT) { -c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow; -c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow; -} -} - if (EXTERNAL_MMXEXT(cpu_flags)) { if (flags & AV_CODEC_FLAG_BITEXACT) { c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avcodec/x86/h264chroma: Remove obsolete 3dnow functions
ffmpeg | branch: master | Andreas Rheinhardt | Sat Jun 11 00:16:31 2022 +0200| [67f0db7bc5c42cc861add0c660d438f6bdc4ccfe] | committer: Andreas Rheinhardt avcodec/x86/h264chroma: Remove obsolete 3dnow functions x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from these functions are truely ancient 32bit x86s they are removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=67f0db7bc5c42cc861add0c660d438f6bdc4ccfe --- libavcodec/x86/h264_chromamc.asm | 4 libavcodec/x86/h264chroma_init.c | 9 - 2 files changed, 13 deletions(-) diff --git a/libavcodec/x86/h264_chromamc.asm b/libavcodec/x86/h264_chromamc.asm index ba6f4af3b0..e562efd69d 100644 --- a/libavcodec/x86/h264_chromamc.asm +++ b/libavcodec/x86/h264_chromamc.asm @@ -446,10 +446,6 @@ chroma_mc4_mmx_func avg, h264 chroma_mc4_mmx_func avg, rv40 chroma_mc2_mmx_func avg, h264 -INIT_MMX 3dnow -chroma_mc8_mmx_func avg, h264, _rnd -chroma_mc4_mmx_func avg, h264 - %macro chroma_mc8_ssse3_func 2-3 cglobal %1_%2_chroma_mc8%3, 6, 7, 8 mov r6d, r5d diff --git a/libavcodec/x86/h264chroma_init.c b/libavcodec/x86/h264chroma_init.c index 36bf29df02..7c0f492178 100644 --- a/libavcodec/x86/h264chroma_init.c +++ b/libavcodec/x86/h264chroma_init.c @@ -28,15 +28,11 @@ void ff_put_h264_chroma_mc8_rnd_mmx (uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y); void ff_avg_h264_chroma_mc8_rnd_mmxext(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y); -void ff_avg_h264_chroma_mc8_rnd_3dnow(uint8_t *dst, uint8_t *src, - ptrdiff_t stride, int h, int x, int y); void ff_put_h264_chroma_mc4_mmx (uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y); void ff_avg_h264_chroma_mc4_mmxext (uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y); -void ff_avg_h264_chroma_mc4_3dnow(uint8_t *dst, uint8_t *src, - ptrdiff_t stride, int h, int x, int y); void ff_put_h264_chroma_mc2_mmxext (uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y); @@ -77,11 +73,6 @@ av_cold void ff_h264chroma_init_x86(H264ChromaContext *c, int bit_depth) c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_mmx; } -if (EXTERNAL_AMD3DNOW(cpu_flags) && !high_bit_depth) { -c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_3dnow; -c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_3dnow; -} - if (EXTERNAL_MMXEXT(cpu_flags) && !high_bit_depth) { c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_mmxext; c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_mmxext; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avcodec/x86/h264_qpel: Remove obsolete MMXEXT functions
ffmpeg | branch: master | Andreas Rheinhardt | Thu Jun 9 03:32:03 2022 +0200| [4011a76494a5ff6844312813bc753aae8e54c2f0] | committer: Andreas Rheinhardt avcodec/x86/h264_qpel: Remove obsolete MMXEXT functions x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from these functions are truely ancient 32bit x86s they are removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4011a76494a5ff6844312813bc753aae8e54c2f0 --- libavcodec/x86/h264_qpel.c | 116 - libavcodec/x86/h264_qpel_10bit.asm | 2 +- libavcodec/x86/h264_qpel_8bit.asm | 7 --- 3 files changed, 39 insertions(+), 86 deletions(-) diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c index dda50ded89..5aa12ff81f 100644 --- a/libavcodec/x86/h264_qpel.c +++ b/libavcodec/x86/h264_qpel.c @@ -236,7 +236,11 @@ static av_always_inline void ff_ ## OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uin #define ff_put_h264_qpel8or16_hv2_lowpass_sse2 ff_put_h264_qpel8or16_hv2_lowpass_mmxext #define ff_avg_h264_qpel8or16_hv2_lowpass_sse2 ff_avg_h264_qpel8or16_hv2_lowpass_mmxext -#define H264_MC(OPNAME, SIZE, MMX, ALIGN) \ +#define H264_MC_C_H(OPNAME, SIZE, MMX, ALIGN) \ +H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\ +H264_MC_H(OPNAME, SIZE, MMX, ALIGN)\ + +#define H264_MC_C_V_H_HV(OPNAME, SIZE, MMX, ALIGN) \ H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\ H264_MC_V(OPNAME, SIZE, MMX, ALIGN)\ H264_MC_H(OPNAME, SIZE, MMX, ALIGN)\ @@ -372,13 +376,9 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, const uin ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_mmxext(dst, halfV+3, halfHV, stride, SIZE, SIZE);\ }\ -#define H264_MC_4816(MMX)\ -H264_MC(put_, 4, MMX, 8)\ -H264_MC(put_, 8, MMX, 8)\ -H264_MC(put_, 16,MMX, 8)\ -H264_MC(avg_, 4, MMX, 8)\ -H264_MC(avg_, 8, MMX, 8)\ -H264_MC(avg_, 16,MMX, 8)\ +#define H264_MC(QPEL, SIZE, MMX, ALIGN)\ +QPEL(put_, SIZE, MMX, ALIGN) \ +QPEL(avg_, SIZE, MMX, ALIGN) \ #define H264_MC_816(QPEL, XMM)\ QPEL(put_, 8, XMM, 16)\ @@ -397,7 +397,9 @@ QPEL_H264_H_XMM(avg_,AVG_MMXEXT_OP, ssse3) QPEL_H264_HV_XMM(put_, PUT_OP, ssse3) QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, ssse3) -H264_MC_4816(mmxext) +H264_MC(H264_MC_C_V_H_HV, 4, mmxext, 8) +H264_MC(H264_MC_C_H, 8, mmxext, 8) +H264_MC(H264_MC_C_H, 16, mmxext, 8) H264_MC_816(H264_MC_V, sse2) H264_MC_816(H264_MC_HV, sse2) H264_MC_816(H264_MC_H, ssse3) @@ -409,13 +411,9 @@ H264_MC_816(H264_MC_HV, ssse3) void ff_ ## OP ## _h264_qpel ## NUM ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT \ (uint8_t *dst, const uint8_t *src, ptrdiff_t stride); -#define LUMA_MC_ALL(DEPTH, TYPE, OPT) \ +#define LUMA_MC_4(DEPTH, TYPE, OPT) \ LUMA_MC_OP(put, 4, DEPTH, TYPE, OPT) \ -LUMA_MC_OP(avg, 4, DEPTH, TYPE, OPT) \ -LUMA_MC_OP(put, 8, DEPTH, TYPE, OPT) \ -LUMA_MC_OP(avg, 8, DEPTH, TYPE, OPT) \ -LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \ -LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT) +LUMA_MC_OP(avg, 4, DEPTH, TYPE, OPT) #define LUMA_MC_816(DEPTH, TYPE, OPT) \ LUMA_MC_OP(put, 8, DEPTH, TYPE, OPT) \ @@ -423,22 +421,22 @@ void ff_ ## OP ## _h264_qpel ## NUM ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT \ LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \ LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT) -LUMA_MC_ALL(10, mc00, mmxext) -LUMA_MC_ALL(10, mc10, mmxext) -LUMA_MC_ALL(10, mc20, mmxext) -LUMA_MC_ALL(10, mc30, mmxext) -LUMA_MC_ALL(10, mc01, mmxext) -LUMA_MC_ALL(10, mc11, mmxext) -LUMA_MC_ALL(10, mc21, mmxext) -LUMA_MC_ALL(10, mc31, mmxext) -LUMA_MC_ALL(10, mc02, mmxext) -LUMA_MC_ALL(10, mc12, mmxext) -LUMA_MC_ALL(10, mc22, mmxext) -LUMA_MC_ALL(10, mc32, mmxext) -LUMA_MC_ALL(10, mc03, mmxext) -LUMA_MC_ALL(10, mc13, mmxext) -LUMA_MC_ALL(10, mc23, mmxext) -LUMA_MC_ALL(10, mc33, mmxext) +LUMA_MC_4(10, mc00, mmxext) +LUMA_MC_4(10, mc10, mmxext) +LUMA_MC_4(10, mc20, mmxext) +LUMA_MC_4(10, mc30, mmxext) +LUMA_MC_4(10, mc01, mmxext) +LUMA_MC_4(10, mc11, mmxext) +LUMA_MC_4(10, mc21, mmxext) +LUMA_MC_4(10, mc31, mmxext) +LUMA_MC_4(10, mc02, mmxext) +LUMA_MC_4(10, mc12, mmxext) +LUMA_MC_4(10, mc22, mmxext) +LUMA_MC_4(10, mc32, mmxext) +LUMA_MC_4(10, mc03, mmxext) +LUMA_MC_4(10, mc13, mmxext) +LUMA_MC_4(10, mc23, mmxext) +LUMA_MC_4(10, mc33, mmxext) LUMA_MC_816(10, mc00, sse2) LUMA_MC_816(10, mc10, sse2) @@ -463,50 +461,18 @@ LUMA_MC_816(10, mc13, sse2) LUMA_MC_816(10, mc23, sse2) LUMA_MC_816(10, mc33, sse2) -#define QPEL16_OPMC(OP, MC, MMX)\ -void ff_ ## OP ## _h264_qpel16_ ## MC ## _10_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride){\ -ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst , src , stride);\ -ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst+16, src+16, stride);\ -src += 8*stride;\ -dst += 8*stride;\ -ff_ ## OP ## _h264_qp
[FFmpeg-cvslog] swscale/x86/rgb2rgb: Remove obsolete MMX, 3dnow functions
ffmpeg | branch: master | Andreas Rheinhardt | Sat Jun 11 01:07:57 2022 +0200| [608319a311a31f7d85333a7b08286c00be38eab6] | committer: Andreas Rheinhardt swscale/x86/rgb2rgb: Remove obsolete MMX, 3dnow functions x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from these functions are truely ancient 32bit x86s they are removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=608319a311a31f7d85333a7b08286c00be38eab6 --- libswscale/x86/rgb2rgb.c | 26 libswscale/x86/rgb2rgb_template.c | 123 +- 2 files changed, 15 insertions(+), 134 deletions(-) diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c index 0ab139aca4..b325e5dbd5 100644 --- a/libswscale/x86/rgb2rgb.c +++ b/libswscale/x86/rgb2rgb.c @@ -85,20 +85,11 @@ DECLARE_ALIGNED(8, extern const uint64_t, ff_bgr2UVOffset); // Note: We have C, MMX, MMXEXT, 3DNOW versions, there is no 3DNOW + MMXEXT one. -#define COMPILE_TEMPLATE_MMXEXT 0 -#define COMPILE_TEMPLATE_AMD3DNOW 0 #define COMPILE_TEMPLATE_SSE2 0 #define COMPILE_TEMPLATE_AVX 0 -//MMX versions -#undef RENAME -#define RENAME(a) a ## _mmx -#include "rgb2rgb_template.c" - // MMXEXT versions #undef RENAME -#undef COMPILE_TEMPLATE_MMXEXT -#define COMPILE_TEMPLATE_MMXEXT 1 #define RENAME(a) a ## _mmxext #include "rgb2rgb_template.c" @@ -116,19 +107,6 @@ DECLARE_ALIGNED(8, extern const uint64_t, ff_bgr2UVOffset); #define RENAME(a) a ## _avx #include "rgb2rgb_template.c" -//3DNOW versions -#undef RENAME -#undef COMPILE_TEMPLATE_MMXEXT -#undef COMPILE_TEMPLATE_SSE2 -#undef COMPILE_TEMPLATE_AVX -#undef COMPILE_TEMPLATE_AMD3DNOW -#define COMPILE_TEMPLATE_MMXEXT 0 -#define COMPILE_TEMPLATE_SSE2 0 -#define COMPILE_TEMPLATE_AVX 0 -#define COMPILE_TEMPLATE_AMD3DNOW 1 -#define RENAME(a) a ## _3dnow -#include "rgb2rgb_template.c" - /* RGB15->RGB16 original by Strepto/Astral ported to gcc & bugfixed : A'rpi @@ -165,10 +143,6 @@ av_cold void rgb2rgb_init_x86(void) int cpu_flags = av_get_cpu_flags(); #if HAVE_INLINE_ASM -if (INLINE_MMX(cpu_flags)) -rgb2rgb_init_mmx(); -if (INLINE_AMD3DNOW(cpu_flags)) -rgb2rgb_init_3dnow(); if (INLINE_MMXEXT(cpu_flags)) rgb2rgb_init_mmxext(); if (INLINE_SSE2(cpu_flags)) diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c index ae2469e663..4aba25dd51 100644 --- a/libswscale/x86/rgb2rgb_template.c +++ b/libswscale/x86/rgb2rgb_template.c @@ -36,34 +36,14 @@ #undef SFENCE #undef PAVGB -#if COMPILE_TEMPLATE_AMD3DNOW -#define PREFETCH "prefetch" -#define PAVGB "pavgusb" -#elif COMPILE_TEMPLATE_MMXEXT #define PREFETCH "prefetchnta" #define PAVGB "pavgb" -#else -#define PREFETCH " # nop" -#endif - -#if COMPILE_TEMPLATE_AMD3DNOW -/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */ -#define EMMS "femms" -#else -#define EMMS "emms" -#endif - -#if COMPILE_TEMPLATE_MMXEXT #define MOVNTQ "movntq" #define SFENCE "sfence" -#else -#define MOVNTQ "movq" -#define SFENCE " # nop" -#endif -#if !COMPILE_TEMPLATE_SSE2 +#define EMMS "emms" -#if !COMPILE_TEMPLATE_AMD3DNOW +#if !COMPILE_TEMPLATE_SSE2 static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size) { @@ -1353,9 +1333,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t SFENCE" \n\t" :::"memory"); } -#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ -#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWidth, int srcHeight, int srcStride, int dstStride) { int x,y; @@ -1453,9 +1431,7 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid SFENCE" \n\t" :::"memory"); } -#endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */ -#if !COMPILE_TEMPLATE_AMD3DNOW /** * Height should be a multiple of 2 and width should be a multiple of 16. * (If this is a problem for anyone then tell me, and I will fix it.) @@ -1559,7 +1535,6 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t SFENCE" \n\t" :::"memory"); } -#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ /** * Height should be a multiple of 2 and width should be a multiple of 2. @@ -1673,7 +1648,6 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ "1: \n\t" PREFETCH" 64(%0, %%"FF_REG_d") \n\t" PREFETCH" 64(%1, %%"FF_REG_d") \n\t" -#if COMPIL
[FFmpeg-cvslog] swscale/x86/yuv2rgb: Remove obsolete MMX functions
ffmpeg | branch: master | Andreas Rheinhardt | Sat Jun 11 01:13:22 2022 +0200| [2831837182fe26f0a19a4d366f3f0553311f1291] | committer: Andreas Rheinhardt swscale/x86/yuv2rgb: Remove obsolete MMX functions x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from these functions are truely ancient 32bit x86s they are removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2831837182fe26f0a19a4d366f3f0553311f1291 --- libswscale/x86/yuv2rgb.c | 15 +-- libswscale/x86/yuv2rgb_template.c | 5 +++-- libswscale/x86/yuv_2_rgb.asm | 5 +++-- 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c index 47f45bd7c2..6754062245 100644 --- a/libswscale/x86/yuv2rgb.c +++ b/libswscale/x86/yuv2rgb.c @@ -44,23 +44,22 @@ //MMX versions #if HAVE_MMX #undef RENAME -#undef COMPILE_TEMPLATE_MMXEXT -#define COMPILE_TEMPLATE_MMXEXT 0 +#define COMPILE_TEMPLATE_MMX #define RENAME(a) a ## _mmx #include "yuv2rgb_template.c" +#undef COMPILE_TEMPLATE_MMX #endif /* HAVE_MMX */ // MMXEXT versions #undef RENAME -#undef COMPILE_TEMPLATE_MMXEXT -#define COMPILE_TEMPLATE_MMXEXT 1 +#define COMPILE_TEMPLATE_MMXEXT #define RENAME(a) a ## _mmxext #include "yuv2rgb_template.c" +#undef COMPILE_TEMPLATE_MMXEXT //SSSE3 versions #undef RENAME -#undef COMPILE_TEMPLATE_MMXEXT -#define COMPILE_TEMPLATE_MMXEXT 0 +#define COMPILE_TEMPLATE_SSSE3 #define RENAME(a) a ## _ssse3 #include "yuv2rgb_template.c" @@ -127,10 +126,6 @@ av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) break; } else return yuv420_bgr32_mmx; -case AV_PIX_FMT_RGB24: -return yuv420_rgb24_mmx; -case AV_PIX_FMT_BGR24: -return yuv420_bgr24_mmx; case AV_PIX_FMT_RGB565: return yuv420_rgb16_mmx; case AV_PIX_FMT_RGB555: diff --git a/libswscale/x86/yuv2rgb_template.c b/libswscale/x86/yuv2rgb_template.c index d506f75e15..596943bb73 100644 --- a/libswscale/x86/yuv2rgb_template.c +++ b/libswscale/x86/yuv2rgb_template.c @@ -47,7 +47,7 @@ extern void RENAME(ff_yuv_420_bgr24)(x86_reg index, uint8_t *image, const uint8_ const uint8_t *pv_index, const uint64_t *pointer_c_dither, const uint8_t *py_2index); -#if !COMPILE_TEMPLATE_MMXEXT +#ifndef COMPILE_TEMPLATE_MMXEXT extern void RENAME(ff_yuv_420_rgb15)(x86_reg index, uint8_t *image, const uint8_t *pu_index, const uint8_t *pv_index, const uint64_t *pointer_c_dither, const uint8_t *py_2index); @@ -165,6 +165,7 @@ static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[], } #endif +#if !defined(COMPILE_TEMPLATE_MMX) static inline int RENAME(yuv420_rgb24)(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, @@ -192,4 +193,4 @@ static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t *src[], } return srcSliceH; } - +#endif diff --git a/libswscale/x86/yuv_2_rgb.asm b/libswscale/x86/yuv_2_rgb.asm index f968b3a0a2..c5fa3ee690 100644 --- a/libswscale/x86/yuv_2_rgb.asm +++ b/libswscale/x86/yuv_2_rgb.asm @@ -69,6 +69,9 @@ SECTION .text %ifidn %1, yuva %define parameters index, image, pu_index, pv_index, pointer_c_dither, py_2index, pa_2index %define GPR_num 7 +%else +%define parameters index, image, pu_index, pv_index, pointer_c_dither, py_2index +%define GPR_num 6 %endif %else %define parameters index, image, pu_index, pv_index, pointer_c_dither, py_2index @@ -356,8 +359,6 @@ REP_RET %endmacro INIT_MMX mmx -yuv2rgb_fn yuv, rgb, 24 -yuv2rgb_fn yuv, bgr, 24 yuv2rgb_fn yuv, rgb, 32 yuv2rgb_fn yuv, bgr, 32 yuv2rgb_fn yuva, rgb, 32 ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avcodec/x86/hpeldsp: Remove obsolete MMX/3dnow functions
ffmpeg | branch: master | Andreas Rheinhardt | Fri Jun 10 22:42:01 2022 +0200| [a51279bbdea0d6db920d71980262bccd0ce78226] | committer: Andreas Rheinhardt avcodec/x86/hpeldsp: Remove obsolete MMX/3dnow functions x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from these functions are truely ancient 32bit x86s they are removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a51279bbdea0d6db920d71980262bccd0ce78226 --- libavcodec/x86/fpel.asm | 1 - libavcodec/x86/hpeldsp.asm| 72 ++-- libavcodec/x86/hpeldsp.h | 2 - libavcodec/x86/hpeldsp_init.c | 108 +++--- libavcodec/x86/rnd_template.c | 2 + 5 files changed, 22 insertions(+), 163 deletions(-) diff --git a/libavcodec/x86/fpel.asm b/libavcodec/x86/fpel.asm index d38a1b1035..ebe8e43750 100644 --- a/libavcodec/x86/fpel.asm +++ b/libavcodec/x86/fpel.asm @@ -91,7 +91,6 @@ cglobal %1_pixels%2, 4,5,4 INIT_MMX mmx OP_PIXELS put, 4 OP_PIXELS put, 8 -OP_PIXELS avg, 8 OP_PIXELS put, 16 OP_PIXELS avg, 16 diff --git a/libavcodec/x86/hpeldsp.asm b/libavcodec/x86/hpeldsp.asm index ce5d7a4e28..b3a270a173 100644 --- a/libavcodec/x86/hpeldsp.asm +++ b/libavcodec/x86/hpeldsp.asm @@ -83,8 +83,6 @@ cglobal put_pixels8_x2, 4,5 INIT_MMX mmxext PUT_PIXELS8_X2 -INIT_MMX 3dnow -PUT_PIXELS8_X2 ; void ff_put_pixels16_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) @@ -127,15 +125,13 @@ cglobal put_pixels16_x2, 4,5 INIT_MMX mmxext PUT_PIXELS_16 -INIT_MMX 3dnow -PUT_PIXELS_16 ; The 8_X2 macro can easily be used here INIT_XMM sse2 PUT_PIXELS8_X2 ; void ff_put_no_rnd_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -%macro PUT_NO_RND_PIXELS8_X2 0 +INIT_MMX mmxext cglobal put_no_rnd_pixels8_x2, 4,5 mova m6, [pb_1] lea r4, [r2*2] @@ -167,12 +163,6 @@ cglobal put_no_rnd_pixels8_x2, 4,5 sub r3d, 4 jne .loop REP_RET -%endmacro - -INIT_MMX mmxext -PUT_NO_RND_PIXELS8_X2 -INIT_MMX 3dnow -PUT_NO_RND_PIXELS8_X2 ; void ff_put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) @@ -209,15 +199,13 @@ cglobal put_pixels8_y2, 4,5 INIT_MMX mmxext PUT_PIXELS8_Y2 -INIT_MMX 3dnow -PUT_PIXELS8_Y2 ; actually, put_pixels16_y2_sse2 INIT_XMM sse2 PUT_PIXELS8_Y2 ; void ff_put_no_rnd_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -%macro PUT_NO_RND_PIXELS8_Y2 0 +INIT_MMX mmxext cglobal put_no_rnd_pixels8_y2, 4,5 mova m6, [pb_1] lea r4, [r2+r2] @@ -245,42 +233,6 @@ cglobal put_no_rnd_pixels8_y2, 4,5 sub r3d, 4 jne .loop REP_RET -%endmacro - -INIT_MMX mmxext -PUT_NO_RND_PIXELS8_Y2 -INIT_MMX 3dnow -PUT_NO_RND_PIXELS8_Y2 - - -; void ff_avg_pixels8(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -%macro AVG_PIXELS8 0 -cglobal avg_pixels8, 4,5 -lea r4, [r2*2] -.loop: -mova m0, [r0] -mova m1, [r0+r2] -PAVGBm0, [r1] -PAVGBm1, [r1+r2] -mova [r0], m0 -mova[r0+r2], m1 -add r1, r4 -add r0, r4 -mova m0, [r0] -mova m1, [r0+r2] -PAVGBm0, [r1] -PAVGBm1, [r1+r2] -add r1, r4 -mova [r0], m0 -mova[r0+r2], m1 -add r0, r4 -sub r3d, 4 -jne .loop -REP_RET -%endmacro - -INIT_MMX 3dnow -AVG_PIXELS8 ; void ff_avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) @@ -291,10 +243,6 @@ cglobal avg_pixels16_x2, 4,5,4 cglobal avg_pixels8_x2, 4,5 %endif lea r4, [r2*2] -%if notcpuflag(mmxext) -pcmpeqd m5, m5 -paddbm5, m5 -%endif .loop: movu m0, [r1] movu m2, [r1+r2] @@ -335,12 +283,8 @@ cglobal avg_pixels8_x2, 4,5 REP_RET %endmacro -INIT_MMX mmx -AVG_PIXELS8_X2 INIT_MMX mmxext AVG_PIXELS8_X2 -INIT_MMX 3dnow -AVG_PIXELS8_X2 ; actually avg_pixels16_x2 INIT_XMM sse2 AVG_PIXELS8_X2 @@ -384,8 +328,6 @@ cglobal avg_pixels8_y2, 4,5 INIT_MMX mmxext AVG_PIXELS8_Y2 -INIT_MMX 3dnow -AVG_PIXELS8_Y2 ; actually avg_pixels16_y2 INIT_XMM sse2 AVG_PIXELS8_Y2 @@ -394,7 +336,7 @@ AVG_PIXELS8_Y2 ; void ff_avg_pixels8_xy2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) ; Note this is not correctly rounded, and is therefore used for ; not-bitexact output -%macro AVG_APPROX_PIXELS8_XY2 0 +INIT_MMX mmxext cglobal avg_approx_pixels8_xy2, 4,5 mova m6, [pb_1] lea r4, [r2*2] @@ -429,12 +371,6 @@ cglobal avg_approx_pixels8_xy2, 4,5 sub r3d, 4 jne .loop REP_RET -%endmacro - -
[FFmpeg-cvslog] swscale/x86/swscale: Remove obsolete and harmful MMX(EXT) functions
ffmpeg | branch: master | Andreas Rheinhardt | Thu Jun 9 16:57:34 2022 +0200| [a05f22eaf393177b94432431c145cbc5ba10390a] | committer: Andreas Rheinhardt swscale/x86/swscale: Remove obsolete and harmful MMX(EXT) functions x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT, SSE and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2). So given that the only systems that benefit from these functions are truely ancient 32bit x86s they are removed. Moreover, some of the removed code was buggy/not bitexact and lead to failures involving the f32le and f32be versions of gray, gbrp and gbrap on x86-32 when SSE2 was not disabled. See e.g. https://fate.ffmpeg.org/report.cgi?time=20220609221253&slot=x86_32-debian-kfreebsd-gcc-4.4-cpuflags-mmx Notice that yuv2yuvX_mmx is not removed, because it is used by SSE3 and AVX2 as fallback in case of unaligned data and also for tail processing. I don't know why yuv2yuvX_mmxext isn't being used for this; an earlier version [1] of 554c2bc7086f49ef5a6a989ad6bc4bc11807eb6f used it, but the version that was eventually applied does not. [1]: https://ffmpeg.org/pipermail/ffmpeg-devel/2020-November/272124.html Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a05f22eaf393177b94432431c145cbc5ba10390a --- libswscale/x86/input.asm | 82 +- libswscale/x86/output.asm | 30 ++ libswscale/x86/scale.asm | 45 - libswscale/x86/swscale.c | 83 +-- libswscale/x86/swscale_template.c | 30 -- 5 files changed, 38 insertions(+), 232 deletions(-) diff --git a/libswscale/x86/input.asm b/libswscale/x86/input.asm index fcdfe2fcd8..6de6733faa 100644 --- a/libswscale/x86/input.asm +++ b/libswscale/x86/input.asm @@ -133,23 +133,18 @@ SECTION .text ; %2 = rgb or bgr %macro RGB24_TO_Y_FN 2-3 cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, table -%if mmsize == 8 -mova m5, [%2_Ycoeff_12x4] -mova m6, [%2_Ycoeff_3x56] -%define coeff1 m5 -%define coeff2 m6 -%elif ARCH_X86_64 +%if ARCH_X86_64 mova m8, [%2_Ycoeff_12x4] mova m9, [%2_Ycoeff_3x56] %define coeff1 m8 %define coeff2 m9 -%else ; x86-32 && mmsize == 16 +%else ; x86-32 %define coeff1 [%2_Ycoeff_12x4] %define coeff2 [%2_Ycoeff_3x56] -%endif ; x86-32/64 && mmsize == 8/16 -%if (ARCH_X86_64 || mmsize == 8) && %0 == 3 +%endif ; x86-32/64 +%if ARCH_X86_64 && %0 == 3 jmp mangle(private_prefix %+ _ %+ %3 %+ 24ToY %+ SUFFIX).body -%else ; (ARCH_X86_64 && %0 == 3) || mmsize == 8 +%else ; ARCH_X86_64 && %0 == 3 .body: %if cpuflag(ssse3) mova m7, [shuf_rgb_12x4] @@ -184,7 +179,6 @@ cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, table movd m1, [srcq+2] ; (byte) { R0, B1, G1, R1 } movd m2, [srcq+6] ; (byte) { B2, G2, R2, B3 } movd m3, [srcq+8] ; (byte) { R2, B3, G3, R3 } -%if mmsize == 16 ; i.e. sse2 punpckldq m0, m2 ; (byte) { B0, G0, R0, B1, B2, G2, R2, B3 } punpckldq m1, m3 ; (byte) { R0, B1, G1, R1, R2, B3, G3, R3 } movd m2, [srcq+12] ; (byte) { B4, G4, R4, B5 } @@ -193,7 +187,6 @@ cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, table movd m6, [srcq+20] ; (byte) { R6, B7, G7, R7 } punpckldq m2, m5 ; (byte) { B4, G4, R4, B5, B6, G6, R6, B7 } punpckldq m3, m6 ; (byte) { R4, B5, G5, R5, R6, B7, G7, R7 } -%endif ; mmsize == 16 punpcklbw m0, m7 ; (word) { B0, G0, R0, B1, B2, G2, R2, B3 } punpcklbw m1, m7 ; (word) { R0, B1, G1, R1, R2, B3, G3, R3 } punpcklbw m2, m7 ; (word) { B4, G4, R4, B5, B6, G6, R6, B7 } @@ -215,7 +208,7 @@ cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, table addwq, mmsize jl .loop REP_RET -%endif ; (ARCH_X86_64 && %0 == 3) || mmsize == 8 +%endif ; ARCH_X86_64 && %0 == 3 %endmacro ; %1 = nr. of XMM registers @@ -275,12 +268,10 @@ cglobal %2 %+ 24ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, table movd m1, [srcq+2] ; (byte) { R0, B1, G1, R1 } movd m4, [srcq+6] ; (byte) { B2, G2, R2, B3 } movd m5, [srcq+8] ; (byte) { R2, B3, G3, R3 } -%if mmsize == 16 punpckldq m0, m4 ; (byte) { B0, G0, R0, B1, B2, G2, R2, B3 } punpckldq m1, m5 ; (byte) { R0, B1, G1, R1, R2, B3, G3, R3 } movd m4, [srcq+12] ; (byte) { B4, G4, R4, B5 } movd m5, [srcq+14] ; (byte) { R4, B5, G5, R5 } -%endif ; mmsize == 16 punpcklbw m0, m7 ; (word) { B0, G0, R0, B1, B2, G2
[FFmpeg-cvslog] swscale/x86/swscale: Simplify macro
ffmpeg | branch: master | Andreas Rheinhardt | Sat Jun 11 18:50:44 2022 +0200| [81d347203166a37b605920873ca2b8f19473ff3f] | committer: Andreas Rheinhardt swscale/x86/swscale: Simplify macro This is possible now that it is no longer used by MMX. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=81d347203166a37b605920873ca2b8f19473ff3f --- libswscale/x86/swscale.c | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c index 97bbc4f2d0..628f12137c 100644 --- a/libswscale/x86/swscale.c +++ b/libswscale/x86/swscale.c @@ -507,12 +507,12 @@ switch(c->dstBpc){ \ case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \ case 8: if ((condition_8bit) && !c->use_mmx_vfilter) vscalefn = ff_yuv2planeX_8_ ## opt; break; \ } -#define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \ +#define ASSIGN_VSCALE_FUNC(vscalefn, opt) \ switch(c->dstBpc){ \ -case 16: if (!isBE(c->dstFormat))vscalefn = ff_yuv2plane1_16_ ## opt1; break; \ -case 10: if (!isBE(c->dstFormat) && !isSemiPlanarYUV(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \ -case 9: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_ ## opt2; break; \ -case 8: vscalefn = ff_yuv2plane1_8_ ## opt1; break; \ +case 16: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_16_ ## opt; break; \ +case 10: if (!isBE(c->dstFormat) && !isSemiPlanarYUV(c->dstFormat)) vscalefn = ff_yuv2plane1_10_ ## opt; break; \ +case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_9_ ## opt; break; \ +case 8: vscalefn = ff_yuv2plane1_8_ ## opt; break; \ default: av_assert0(c->dstBpc>8); \ } #define case_rgb(x, X, opt) \ @@ -534,7 +534,7 @@ switch(c->dstBpc){ \ ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2); ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, , HAVE_ALIGNED_STACK || ARCH_X86_64); -ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2, sse2, 1); +ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2); switch (c->srcFormat) { case AV_PIX_FMT_YA8: @@ -590,7 +590,7 @@ switch(c->dstBpc){ \ if (EXTERNAL_AVX(cpu_flags)) { ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, , HAVE_ALIGNED_STACK || ARCH_X86_64); -ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1); +ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx); switch (c->srcFormat) { case AV_PIX_FMT_YUYV422: ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avfilter/x86/vf_eq_init: Remove obsolete MMXEXT function
ffmpeg | branch: master | Andreas Rheinhardt | Sat Jun 11 01:37:50 2022 +0200| [77b2a422a09d1d801bebc3614f685fec0812963e] | committer: Andreas Rheinhardt avfilter/x86/vf_eq_init: Remove obsolete MMXEXT function x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from process_mmxext are truely ancient 32bit x86s it is removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=77b2a422a09d1d801bebc3614f685fec0812963e --- libavfilter/x86/vf_eq.asm| 12 ++-- libavfilter/x86/vf_eq_init.c | 20 2 files changed, 2 insertions(+), 30 deletions(-) diff --git a/libavfilter/x86/vf_eq.asm b/libavfilter/x86/vf_eq.asm index a30a287029..5118ffcba9 100644 --- a/libavfilter/x86/vf_eq.asm +++ b/libavfilter/x86/vf_eq.asm @@ -24,7 +24,7 @@ SECTION .text -%macro PROCESS_ONE_LINE 1 +INIT_XMM sse2 cglobal process_one_line, 5, 7, 5, src, dst, contrast, brightness, w movd m3, contrastd movd m4, brightnessd @@ -39,7 +39,7 @@ cglobal process_one_line, 5, 7, 5, src, dst, contrast, brightness, w pxor m1, m1 mov scalard, wd and scalard, mmsize-1 -sar wd, %1 +sar wd, 4 cmp wd, 1 jl .loop1 @@ -80,11 +80,3 @@ cglobal process_one_line, 5, 7, 5, src, dst, contrast, brightness, w .end: RET - -%endmacro - -INIT_MMX mmxext -PROCESS_ONE_LINE 3 - -INIT_XMM sse2 -PROCESS_ONE_LINE 4 diff --git a/libavfilter/x86/vf_eq_init.c b/libavfilter/x86/vf_eq_init.c index 113056e76b..a1719672df 100644 --- a/libavfilter/x86/vf_eq_init.c +++ b/libavfilter/x86/vf_eq_init.c @@ -25,27 +25,10 @@ #include "libavutil/x86/asm.h" #include "libavfilter/vf_eq.h" -extern void ff_process_one_line_mmxext(const uint8_t *src, uint8_t *dst, short contrast, - short brightness, int w); extern void ff_process_one_line_sse2(const uint8_t *src, uint8_t *dst, short contrast, short brightness, int w); #if HAVE_X86ASM -static void process_mmxext(EQParameters *param, uint8_t *dst, int dst_stride, - const uint8_t *src, int src_stride, int w, int h) -{ -short contrast = (short) (param->contrast * 256 * 16); -short brightness = ((short) (100.0 * param->brightness + 100.0) * 511) - / 200 - 128 - contrast / 32; - -while (h--) { -ff_process_one_line_mmxext(src, dst, contrast, brightness, w); -src += src_stride; -dst += dst_stride; -} -emms_c(); -} - static void process_sse2(EQParameters *param, uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int w, int h) { @@ -65,9 +48,6 @@ av_cold void ff_eq_init_x86(EQContext *eq) { #if HAVE_X86ASM int cpu_flags = av_get_cpu_flags(); -if (EXTERNAL_MMXEXT(cpu_flags)) { -eq->process = process_mmxext; -} if (EXTERNAL_SSE2(cpu_flags)) { eq->process = process_sse2; } ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avutil/x86/pixelutils: Remove obsolete MMX(EXT) functions
ffmpeg | branch: master | Andreas Rheinhardt | Thu Jun 9 17:50:53 2022 +0200| [ea043cc53ed3506775ec6239ed5f8a20718b1098] | committer: Andreas Rheinhardt avutil/x86/pixelutils: Remove obsolete MMX(EXT) functions x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT, SSE and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2). So given that the only systems which benefit from the 8x8 MMX (overridden by MMXEXT) or the 16x16 MMXEXT (overridden by SSE2) are truely ancient 32bit x86s they are removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ea043cc53ed3506775ec6239ed5f8a20718b1098 --- libavutil/x86/pixelutils.asm| 58 - libavutil/x86/pixelutils_init.c | 9 --- 2 files changed, 67 deletions(-) diff --git a/libavutil/x86/pixelutils.asm b/libavutil/x86/pixelutils.asm index 8b45ead78b..fbe9b45971 100644 --- a/libavutil/x86/pixelutils.asm +++ b/libavutil/x86/pixelutils.asm @@ -25,44 +25,6 @@ SECTION .text -;--- -; int ff_pixelutils_sad_8x8_mmx(const uint8_t *src1, ptrdiff_t stride1, -; const uint8_t *src2, ptrdiff_t stride2); -;--- -INIT_MMX mmx -cglobal pixelutils_sad_8x8, 4,4,0, src1, stride1, src2, stride2 -pxorm7, m7 -pxorm6, m6 -%rep 4 -movam0, [src1q] -movam2, [src1q + stride1q] -movam1, [src2q] -movam3, [src2q + stride2q] -psubusb m4, m0, m1 -psubusb m5, m2, m3 -psubusb m1, m0 -psubusb m3, m2 -por m1, m4 -por m3, m5 -punpcklbw m0, m1, m7 -punpcklbw m2, m3, m7 -punpckhbw m1, m7 -punpckhbw m3, m7 -paddw m0, m1 -paddw m2, m3 -paddw m0, m2 -paddw m6, m0 -lea src1q, [src1q + 2*stride1q] -lea src2q, [src2q + 2*stride2q] -%endrep -psrlq m0, m6, 32 -paddw m6, m0 -psrlq m0, m6, 16 -paddw m6, m0 -movdeax, m6 -movzx eax, ax -RET - ;--- ; int ff_pixelutils_sad_8x8_mmxext(const uint8_t *src1, ptrdiff_t stride1, ; const uint8_t *src2, ptrdiff_t stride2); @@ -83,26 +45,6 @@ cglobal pixelutils_sad_8x8, 4,4,0, src1, stride1, src2, stride2 movdeax, m2 RET -;--- -; int ff_pixelutils_sad_16x16_mmxext(const uint8_t *src1, ptrdiff_t stride1, -;const uint8_t *src2, ptrdiff_t stride2); -;--- -INIT_MMX mmxext -cglobal pixelutils_sad_16x16, 4,4,0, src1, stride1, src2, stride2 -pxorm2, m2 -%rep 16 -movam0, [src1q] -movam1, [src1q + 8] -psadbw m0, [src2q] -psadbw m1, [src2q + 8] -paddw m2, m0 -paddw m2, m1 -add src1q, stride1q -add src2q, stride2q -%endrep -movdeax, m2 -RET - ;--- ; int ff_pixelutils_sad_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1, ; const uint8_t *src2, ptrdiff_t stride2); diff --git a/libavutil/x86/pixelutils_init.c b/libavutil/x86/pixelutils_init.c index 184a3a4a9f..c3c0662414 100644 --- a/libavutil/x86/pixelutils_init.c +++ b/libavutil/x86/pixelutils_init.c @@ -21,13 +21,9 @@ #include "pixelutils.h" #include "cpu.h" -int ff_pixelutils_sad_8x8_mmx(const uint8_t *src1, ptrdiff_t stride1, - const uint8_t *src2, ptrdiff_t stride2); int ff_pixelutils_sad_8x8_mmxext(const uint8_t *src1, ptrdiff_t stride1, const uint8_t *src2, ptrdiff_t stride2); -int ff_pixelutils_sad_16x16_mmxext(const uint8_t *src1, ptrdiff_t stride1, - const uint8_t *src2, ptrdiff_t stride2); int ff_pixelutils_sad_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1, const uint8_t *src2, ptrdiff_t stride2); int ff_pixelutils_sad_a_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1, @@ -53,10 +49,6 @@ void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned) { int cpu_flags = av_get_cpu_flags(); -if (EXTERNAL_MMX(cpu_flags)) { -sad[2] = ff_pixelutils_sad_8x8_mmx; -} - // The best way to use SSE2 would be to do 2 SADs in parallel, // but we'd have to modify the pixelutils API to return SIMD functions. @@ -65,7 +57,6 @@ void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, in
[FFmpeg-cvslog] avcodec/x86/fft: Remove obsolete 3dnow functions
ffmpeg | branch: master | Andreas Rheinhardt | Sat Jun 11 02:45:08 2022 +0200| [ec735579814b6b73e17da601ff011a918c49e40f] | committer: Andreas Rheinhardt avcodec/x86/fft: Remove obsolete 3dnow functions x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT, SSE and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2). So given that the only systems which benefit from the 3dnow implementations are truely ancient 32bit AMD x86s they are removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ec735579814b6b73e17da601ff011a918c49e40f --- libavcodec/x86/fft.asm| 259 ++ libavcodec/x86/fft.h | 6 -- libavcodec/x86/fft_init.c | 14 --- 3 files changed, 6 insertions(+), 273 deletions(-) diff --git a/libavcodec/x86/fft.asm b/libavcodec/x86/fft.asm index a671e8f48e..a44596e565 100644 --- a/libavcodec/x86/fft.asm +++ b/libavcodec/x86/fft.asm @@ -1,5 +1,5 @@ ;** -;* FFT transform with SSE/3DNow optimizations +;* FFT transform with SSE/AVX optimizations ;* Copyright (c) 2008 Loren Merritt ;* Copyright (c) 2011 Vitor Sessak ;* @@ -92,29 +92,6 @@ cextern cos_ %+ i SECTION .text -%macro T2_3DNOW 4 ; z0, z1, mem0, mem1 -mova %1, %3 -mova %2, %1 -pfadd%1, %4 -pfsub%2, %4 -%endmacro - -%macro T4_3DNOW 6 ; z0, z1, z2, z3, tmp0, tmp1 -mova %5, %3 -pfsub%3, %4 -pfadd%5, %4 ; {t6,t5} -pxor %3, [ps_m1p1] ; {t8,t7} -mova %6, %1 -movd [r0+12], %3 -punpckhdq %3, [r0+8] -pfadd%1, %5 ; {r0,i0} -pfsub%6, %5 ; {r2,i2} -mova %4, %2 -pfadd%2, %3 ; {r1,i1} -pfsub%4, %3 ; {r3,i3} -SWAP %3, %6 -%endmacro - ; in: %1 = {r0,i0,r2,i2,r4,i4,r6,i6} ; %2 = {r1,i1,r3,i3,r5,i5,r7,i7} ; %3, %4, %5 tmp @@ -199,7 +176,7 @@ SECTION .text vextractf128 %4 %+ H(%5), %3, 0 vextractf128 %4(%5 + 1), %2, 1 vextractf128 %4 %+ H(%5 + 1), %3, 1 -%elif cpuflag(sse) || cpuflag(3dnow) +%elif cpuflag(sse) mova %3, %2 unpcklps %2, %1 unpckhps %3, %1 @@ -310,12 +287,6 @@ IF%1 mova Z(1), m5 %endif %endmacro -%macro PUNPCK 3 -mova %3, %1 -punpckldq %1, %2 -punpckhdq %3, %2 -%endmacro - %define Z(x) [r0+mmsize*x] %define Z2(x) [r0+mmsize*x] %define ZH(x) [r0+mmsize*x+mmsize/2] @@ -462,68 +433,6 @@ fft16_sse: ret -%macro FFT48_3DNOW 0 -align 16 -fft4 %+ SUFFIX: -T2_3DNOW m0, m1, Z(0), Z(1) -mova m2, Z(2) -mova m3, Z(3) -T4_3DNOW m0, m1, m2, m3, m4, m5 -PUNPCK m0, m1, m4 -PUNPCK m2, m3, m5 -mova Z(0), m0 -mova Z(1), m4 -mova Z(2), m2 -mova Z(3), m5 -ret - -align 16 -fft8 %+ SUFFIX: -T2_3DNOW m0, m1, Z(0), Z(1) -mova m2, Z(2) -mova m3, Z(3) -T4_3DNOW m0, m1, m2, m3, m4, m5 -mova Z(0), m0 -mova Z(2), m2 -T2_3DNOW m4, m5, Z(4), Z(5) -T2_3DNOW m6, m7, Z2(6), Z2(7) -PSWAPD m0, m5 -PSWAPD m2, m7 -pxor m0, [ps_m1p1] -pxor m2, [ps_m1p1] -pfsubm5, m0 -pfaddm7, m2 -pfmulm5, [ps_root2] -pfmulm7, [ps_root2] -T4_3DNOW m1, m3, m5, m7, m0, m2 -mova Z(5), m5 -mova Z2(7), m7 -mova m0, Z(0) -mova m2, Z(2) -T4_3DNOW m0, m2, m4, m6, m5, m7 -PUNPCK m0, m1, m5 -PUNPCK m2, m3, m7 -mova Z(0), m0 -mova Z(1), m5 -mova Z(2), m2 -mova Z(3), m7 -PUNPCK m4, Z(5), m5 -PUNPCK m6, Z2(7), m7 -mova Z(4), m4 -mova Z(5), m5 -mova Z2(6), m6 -mova Z2(7), m7 -ret -%endmacro - -%if ARCH_X86_32 -INIT_MMX 3dnowext -FFT48_3DNOW - -INIT_MMX 3dnow -FFT48_3DNOW -%endif - %define Z(x) [zcq + o1q*(x&6) + mmsize*(x&1)] %define Z2(x) [zcq + o3q + mmsize*(x&1)] %define ZH(x) [zcq + o1q*(x&6) + mmsize*(x&1) + mmsize/2] @@ -575,7 +484,7 @@ INIT_XMM sse DECL_PASS pass_sse, PASS_BIG 1 DECL_PASS pass_interleave_sse, PASS_BIG 0 -%macro FFT_CALC_FUNC 0 +INIT_XMM sse cglobal fft_calc, 2,5,8 mov r3d, [r0 + FFTContext.nbits] PUSHr1 @@ -592,36 +501,16 @@ cglobal fft_calc, 2,5,8 shl r2, cl sub r4, r2 .loop: -%if mmsize == 8 -PSWAPD m0, [r4 + r2 + 4] -mova [r4 + r2 + 4], m0 -%else movaps xmm0, [r4 + r2] movaps xmm1, xmm0 unpcklps xmm0, [r4 + r2 + 16] unpckhps xmm1, [r4 + r2 + 16] movaps [r4 + r2], xmm0 movaps [r4 + r2 + 16], xmm1 -%endif add r2, mmsize*2 jl .loop .end: -%if cpuflag(3dnow) -femms -RET -%else REP_RET -%endif -%endmacro - -%if ARCH_X86_32 -INIT_MMX 3dnow -FFT_CALC_FUNC -INIT_MMX 3dnowext -FFT_CALC_FUNC -%endif -INIT_XMM sse -FFT_CALC_FUNC cglobal fft_permute, 2,7,1 mov r4, [r0 + FFTContext.revtab] @@ -656,7 +545,7 @@ cglobal fft_
[FFmpeg-cvslog] avcodec/x86/vorbisdsp: Remove obsolete 3dnow functions
ffmpeg | branch: master | Andreas Rheinhardt | Sat Jun 11 03:31:25 2022 +0200| [f76477d4d78b01ae0170d3e7d8ef77ac3105a80c] | committer: Andreas Rheinhardt avcodec/x86/vorbisdsp: Remove obsolete 3dnow functions x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT, SSE and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2). So given that the only systems which benefit from the 3dnow implementations are truely ancient 32bit AMD x86s they are removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f76477d4d78b01ae0170d3e7d8ef77ac3105a80c --- libavcodec/x86/vorbisdsp.asm| 29 - libavcodec/x86/vorbisdsp_init.c | 6 -- 2 files changed, 35 deletions(-) diff --git a/libavcodec/x86/vorbisdsp.asm b/libavcodec/x86/vorbisdsp.asm index d952296716..9afe2eb352 100644 --- a/libavcodec/x86/vorbisdsp.asm +++ b/libavcodec/x86/vorbisdsp.asm @@ -27,35 +27,6 @@ pdw_8000: times 4 dd 0x8000 SECTION .text -%if ARCH_X86_32 -INIT_MMX 3dnow -cglobal vorbis_inverse_coupling, 3, 3, 6, mag, ang, block_size -pxor m7, m7 -leamagq, [magq+block_sizeq*4] -leaangq, [angq+block_sizeq*4] -neg block_sizeq -.loop: -mova m0, [magq+block_sizeq*4] -mova m1, [angq+block_sizeq*4] -mova m2, m0 -mova m3, m1 -pfcmpge m2, m7 ; m <= 0.0 -pfcmpge m3, m7 ; a <= 0.0 -pslldm2, 31 ; keep only the sign bit -pxor m1, m2 -mova m4, m3 -pand m3, m1 -pandnm4, m1 -pfaddm3, m0 ; a = m + ((a < 0) & (a ^ sign(m))) -pfsubm0, m4 ; m = m + ((a > 0) & (a ^ sign(m))) -mova [angq+block_sizeq*4], m3 -mova [magq+block_sizeq*4], m0 -add block_sizeq, 2 -jl .loop -femms -RET -%endif - INIT_XMM sse cglobal vorbis_inverse_coupling, 3, 3, 6, mag, ang, block_size mova m5, [pdw_8000] diff --git a/libavcodec/x86/vorbisdsp_init.c b/libavcodec/x86/vorbisdsp_init.c index bc1cc43a18..da9f9e685e 100644 --- a/libavcodec/x86/vorbisdsp_init.c +++ b/libavcodec/x86/vorbisdsp_init.c @@ -24,8 +24,6 @@ #include "libavutil/x86/cpu.h" #include "libavcodec/vorbisdsp.h" -void ff_vorbis_inverse_coupling_3dnow(float *mag, float *ang, - intptr_t blocksize); void ff_vorbis_inverse_coupling_sse(float *mag, float *ang, intptr_t blocksize); @@ -33,10 +31,6 @@ av_cold void ff_vorbisdsp_init_x86(VorbisDSPContext *dsp) { int cpu_flags = av_get_cpu_flags(); -#if ARCH_X86_32 -if (EXTERNAL_AMD3DNOW(cpu_flags)) -dsp->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_3dnow; -#endif /* ARCH_X86_32 */ if (EXTERNAL_SSE(cpu_flags)) dsp->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_sse; } ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avutil/x86/float_dsp: Remove obsolete 3dnowext function
ffmpeg | branch: master | Andreas Rheinhardt | Sun Jun 12 00:40:09 2022 +0200| [2718a3be1f8867fd4f6cb3f452d6917838b1ed88] | committer: Andreas Rheinhardt avutil/x86/float_dsp: Remove obsolete 3dnowext function x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT, SSE and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2). So given that the only systems which benefit from ff_vector_fmul_window_3dnowext are truely ancient 32bit AMD x86s it is removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2718a3be1f8867fd4f6cb3f452d6917838b1ed88 --- libavutil/x86/float_dsp.asm| 25 + libavutil/x86/float_dsp_init.c | 5 - 2 files changed, 1 insertion(+), 29 deletions(-) diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm index b773e61a64..cca4d019c7 100644 --- a/libavutil/x86/float_dsp.asm +++ b/libavutil/x86/float_dsp.asm @@ -294,7 +294,7 @@ VECTOR_DMUL_SCALAR ; vector_fmul_window(float *dst, const float *src0, ;const float *src1, const float *win, int len); ;- -%macro VECTOR_FMUL_WINDOW 0 +INIT_XMM sse cglobal vector_fmul_window, 5, 6, 6, dst, src0, src1, win, len, len1 shl lend, 2 lealen1q, [lenq - mmsize] @@ -305,7 +305,6 @@ cglobal vector_fmul_window, 5, 6, 6, dst, src0, src1, win, len, len1 .loop: mova m0, [winq + lenq] mova m4, [src0q + lenq] -%if cpuflag(sse) mova m1, [winq + len1q] mova m5, [src1q + len1q] shufpsm1, m1, 0x1b @@ -319,34 +318,12 @@ cglobal vector_fmul_window, 5, 6, 6, dst, src0, src1, win, len, len1 addps m2, m3 subps m1, m0 shufpsm2, m2, 0x1b -%else -pswapdm1, [winq + len1q] -pswapdm5, [src1q + len1q] -mova m2, m0 -mova m3, m1 -pfmul m2, m4 -pfmul m3, m5 -pfmul m1, m4 -pfmul m0, m5 -pfadd m2, m3 -pfsub m1, m0 -pswapdm2, m2 -%endif mova [dstq + lenq], m1 mova [dstq + len1q], m2 sub len1q, mmsize add lenq, mmsize jl .loop -%if mmsize == 8 -femms -%endif REP_RET -%endmacro - -INIT_MMX 3dnowext -VECTOR_FMUL_WINDOW -INIT_XMM sse -VECTOR_FMUL_WINDOW ;- ; vector_fmul_add(float *dst, const float *src0, const float *src1, diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c index 8826e4e2c9..ad17bc2044 100644 --- a/libavutil/x86/float_dsp_init.c +++ b/libavutil/x86/float_dsp_init.c @@ -56,8 +56,6 @@ void ff_vector_dmul_scalar_sse2(double *dst, const double *src, void ff_vector_dmul_scalar_avx(double *dst, const double *src, double mul, int len); -void ff_vector_fmul_window_3dnowext(float *dst, const float *src0, -const float *src1, const float *win, int len); void ff_vector_fmul_window_sse(float *dst, const float *src0, const float *src1, const float *win, int len); @@ -83,9 +81,6 @@ av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) { int cpu_flags = av_get_cpu_flags(); -if (EXTERNAL_AMD3DNOWEXT(cpu_flags)) { -fdsp->vector_fmul_window = ff_vector_fmul_window_3dnowext; -} if (EXTERNAL_SSE(cpu_flags)) { fdsp->vector_fmul = ff_vector_fmul_sse; fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avcodec/x86/videodsp: Remove obsolete MMX, 3dnow, SSE functions
ffmpeg | branch: master | Andreas Rheinhardt | Sun Jun 12 02:13:39 2022 +0200| [19abc4c0a9ee5e45b630d7ca9815e8d0723a24e2] | committer: Andreas Rheinhardt avcodec/x86/videodsp: Remove obsolete MMX, 3dnow, SSE functions x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT, SSE and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2). So given that the only systems which benefit from these functions are truely ancient 32bit x86s they are removed. Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=19abc4c0a9ee5e45b630d7ca9815e8d0723a24e2 --- libavcodec/x86/videodsp.asm| 38 ++ libavcodec/x86/videodsp_init.c | 71 -- 2 files changed, 3 insertions(+), 106 deletions(-) diff --git a/libavcodec/x86/videodsp.asm b/libavcodec/x86/videodsp.asm index e237860700..b19a8300c5 100644 --- a/libavcodec/x86/videodsp.asm +++ b/libavcodec/x86/videodsp.asm @@ -45,7 +45,6 @@ SECTION .text jnz .%1_y_loop %endmacro -%macro vvar_fn 0 ; .. <- zero ; ||<- top is copied from first line in body of source ; || <- start_y @@ -53,6 +52,7 @@ SECTION .text ; || <- end_y ; ||<- bottom is copied from last line in body of source ; '' <- bh +INIT_XMM sse %if ARCH_X86_64 cglobal emu_edge_vvar, 7, 8, 1, dst, dst_stride, src, src_stride, \ start_y, end_y, bh, w @@ -81,15 +81,6 @@ cglobal emu_edge_vvar, 1, 6, 1, dst, src, start_y, end_y, bh, w V_COPY_ROW bottom, bhq; v_copy_row(bottom, bh) .end: ; } RET -%endmacro - -%if ARCH_X86_32 -INIT_MMX mmx -vvar_fn -%endif - -INIT_XMM sse -vvar_fn %macro hvar_fn 0 cglobal emu_edge_hvar, 5, 6, 1, dst, dst_stride, start_x, n_words, h, w @@ -105,11 +96,7 @@ cglobal emu_edge_hvar, 5, 6, 1, dst, dst_stride, start_x, n_words, h, w imul wd, 0x01010101 ; w *= 0x01010101 movd m0, wd mov wq, n_wordsq ; initialize w -%if cpuflag(sse2) pshufd m0, m0, q ; splat -%else ; mmx -punpckldqm0, m0 ; splat -%endif ; mmx/sse %endif ; avx2 .x_loop:; do { movu[dstq+wq*2], m0 ; write($reg, $mmsize) @@ -123,11 +110,6 @@ cglobal emu_edge_hvar, 5, 6, 1, dst, dst_stride, start_x, n_words, h, w RET %endmacro -%if ARCH_X86_32 -INIT_MMX mmx -hvar_fn -%endif - INIT_XMM sse2 hvar_fn @@ -338,9 +320,6 @@ cglobal emu_edge_vfix %+ %%n, 1, 5, 1, dst, src, start_y, end_y, bh INIT_MMX mmx VERTICAL_EXTEND 1, 15 -%if ARCH_X86_32 -VERTICAL_EXTEND 16, 22 -%endif INIT_XMM sse VERTICAL_EXTEND 16, 22 @@ -438,9 +417,6 @@ cglobal emu_edge_hfix %+ %%n, 4, 5, 1, dst, dst_stride, start_x, bh, val INIT_MMX mmx H_EXTEND 2, 14 -%if ARCH_X86_32 -H_EXTEND 16, 22 -%endif INIT_XMM sse2 H_EXTEND 16, 22 @@ -450,19 +426,11 @@ INIT_XMM avx2 H_EXTEND 8, 22 %endif -%macro PREFETCH_FN 1 +INIT_MMX mmxext cglobal prefetch, 3, 3, 0, buf, stride, h .loop: -%1 [bufq] +prefetcht0 [bufq] add bufq, strideq dechd jg .loop REP_RET -%endmacro - -INIT_MMX mmxext -PREFETCH_FN prefetcht0 -%if ARCH_X86_32 -INIT_MMX 3dnow -PREFETCH_FN prefetch -%endif diff --git a/libavcodec/x86/videodsp_init.c b/libavcodec/x86/videodsp_init.c index 961424aa13..a14c9635fb 100644 --- a/libavcodec/x86/videodsp_init.c +++ b/libavcodec/x86/videodsp_init.c @@ -52,26 +52,6 @@ extern emu_edge_vfix_func ff_emu_edge_vfix12_mmx; extern emu_edge_vfix_func ff_emu_edge_vfix13_mmx; extern emu_edge_vfix_func ff_emu_edge_vfix14_mmx; extern emu_edge_vfix_func ff_emu_edge_vfix15_mmx; -extern emu_edge_vfix_func ff_emu_edge_vfix16_mmx; -extern emu_edge_vfix_func ff_emu_edge_vfix17_mmx; -extern emu_edge_vfix_func ff_emu_edge_vfix18_mmx; -extern emu_edge_vfix_func ff_emu_edge_vfix19_mmx; -extern emu_edge_vfix_func ff_emu_edge_vfix20_mmx; -extern emu_edge_vfix_func ff_emu_edge_vfix21_mmx; -extern emu_edge_vfix_func ff_emu_edge_vfix22_mmx; -#if ARCH_X86_32 -static emu_edge_vfix_func * const vfixtbl_mmx[22] = { -&ff_emu_edge_vfix1_mmx, &ff_emu_edge_vfix2_mmx, &ff_emu_edge_vfix3_mmx, -&ff_emu_edge_vfix4_mmx, &ff_emu_edge_vfix5_mmx, &ff_emu_edge_vfix6_mmx, -&ff_emu_edge_vfix7_mmx, &ff_emu_edge_vfix8_mmx, &ff_emu_edge_vfix9_mmx, -&ff_emu_edge_vfix10_mmx, &ff_emu_edge_vfix11_mmx, &ff_emu_edge_vfix12_mmx, -&ff_emu_edge_vfix13_mmx, &ff_emu_edge_vfix14_mmx, &ff_emu_edge_vfix15_mmx, -&ff_emu_edge_vfix16_mmx, &ff_emu_edge_vfix17_mmx, &ff_emu_edge_vfix18_mmx, -&ff_emu_edge_vfix19_mmx, &ff_emu_edge_vfix20_mmx, &ff_emu_edge_vfix21_mmx, -&ff_emu_edge_vfix22_mmx -}; -#endif -extern emu_edge_vvar_func ff_emu_edge_vvar_mmx; extern emu_edge_vfix_func f
[FFmpeg-cvslog] avfilter/x86/vf_yadif: Remove obsolete MMXEXT functions
ffmpeg | branch: master | Andreas Rheinhardt | Mon Jun 13 08:34:13 2022 +0200| [4d7128be9a31c7bf6cb79436711ded3cc9767fe8] | committer: Andreas Rheinhardt avfilter/x86/vf_yadif: Remove obsolete MMXEXT functions The only system which benefit from these are truely ancient 32bit x86s as all other systems use at least the SSE2 versions (this includes all x64 cpus (which is why this code is restricted to x86-32)). Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4d7128be9a31c7bf6cb79436711ded3cc9767fe8 --- libavfilter/x86/vf_yadif.asm| 8 libavfilter/x86/vf_yadif_init.c | 21 - libavfilter/x86/yadif-10.asm| 4 libavfilter/x86/yadif-16.asm| 4 4 files changed, 37 deletions(-) diff --git a/libavfilter/x86/vf_yadif.asm b/libavfilter/x86/vf_yadif.asm index a29620ce55..809cebdd3f 100644 --- a/libavfilter/x86/vf_yadif.asm +++ b/libavfilter/x86/vf_yadif.asm @@ -133,12 +133,8 @@ SECTION .text psubusb m2, m3 psubusb m3, m4 pmaxub m2, m3 -%if mmsize == 16 mova m3, m2 psrldq m3, 2 -%else -pshufw m3, m2, q0021 -%endif punpcklbwm2, m7 punpcklbwm3, m7 paddwm0, m2 @@ -237,7 +233,3 @@ INIT_XMM ssse3 YADIF INIT_XMM sse2 YADIF -%if ARCH_X86_32 -INIT_MMX mmxext -YADIF -%endif diff --git a/libavfilter/x86/vf_yadif_init.c b/libavfilter/x86/vf_yadif_init.c index 66cbee8510..257c3f9199 100644 --- a/libavfilter/x86/vf_yadif_init.c +++ b/libavfilter/x86/vf_yadif_init.c @@ -23,9 +23,6 @@ #include "libavutil/x86/cpu.h" #include "libavfilter/yadif.h" -void ff_yadif_filter_line_mmxext(void *dst, void *prev, void *cur, - void *next, int w, int prefs, - int mrefs, int parity, int mode); void ff_yadif_filter_line_sse2(void *dst, void *prev, void *cur, void *next, int w, int prefs, int mrefs, int parity, int mode); @@ -33,9 +30,6 @@ void ff_yadif_filter_line_ssse3(void *dst, void *prev, void *cur, void *next, int w, int prefs, int mrefs, int parity, int mode); -void ff_yadif_filter_line_16bit_mmxext(void *dst, void *prev, void *cur, - void *next, int w, int prefs, - int mrefs, int parity, int mode); void ff_yadif_filter_line_16bit_sse2(void *dst, void *prev, void *cur, void *next, int w, int prefs, int mrefs, int parity, int mode); @@ -46,9 +40,6 @@ void ff_yadif_filter_line_16bit_sse4(void *dst, void *prev, void *cur, void *next, int w, int prefs, int mrefs, int parity, int mode); -void ff_yadif_filter_line_10bit_mmxext(void *dst, void *prev, void *cur, - void *next, int w, int prefs, - int mrefs, int parity, int mode); void ff_yadif_filter_line_10bit_sse2(void *dst, void *prev, void *cur, void *next, int w, int prefs, int mrefs, int parity, int mode); @@ -63,10 +54,6 @@ av_cold void ff_yadif_init_x86(YADIFContext *yadif) : yadif->csp->comp[0].depth; if (bit_depth >= 15) { -#if ARCH_X86_32 -if (EXTERNAL_MMXEXT(cpu_flags)) -yadif->filter_line = ff_yadif_filter_line_16bit_mmxext; -#endif /* ARCH_X86_32 */ if (EXTERNAL_SSE2(cpu_flags)) yadif->filter_line = ff_yadif_filter_line_16bit_sse2; if (EXTERNAL_SSSE3(cpu_flags)) @@ -74,19 +61,11 @@ av_cold void ff_yadif_init_x86(YADIFContext *yadif) if (EXTERNAL_SSE4(cpu_flags)) yadif->filter_line = ff_yadif_filter_line_16bit_sse4; } else if ( bit_depth >= 9 && bit_depth <= 14) { -#if ARCH_X86_32 -if (EXTERNAL_MMXEXT(cpu_flags)) -yadif->filter_line = ff_yadif_filter_line_10bit_mmxext; -#endif /* ARCH_X86_32 */ if (EXTERNAL_SSE2(cpu_flags)) yadif->filter_line = ff_yadif_filter_line_10bit_sse2; if (EXTERNAL_SSSE3(cpu_flags)) yadif->filter_line = ff_yadif_filter_line_10bit_ssse3; } else { -#if ARCH_X86_32 -if (EXTERNAL_MMXEXT(cpu_flags)) -yadif->filter_line = ff_yadif_filter_line_mmxext; -#endif /* ARCH_X86_32 */ if (EXTERNAL_SSE2(cpu_flags)) yadif->filter_line = ff_yadif_filter_line_sse2; if (EXTERNAL_SSSE3(cpu_flags)) diff --git a/libavfilter/x86/yadif-10.asm b/libavfilter/x86/yadif-10.asm index 8853e0d2c7..c6952db64b 100644 --- a/libavfilter/x86/yadif-10.asm +++ b/libavfilter/x86/yadif-10.asm @@ -249,7 +249,3 @@ INIT_XMM ssse3 YADIF INIT_XMM sse2 YADIF -%if ARCH_X86_32 -INIT_MMX mmxext -
[FFmpeg-cvslog] avfilter/x86/vf_idet: Remove obsolete MMX(EXT) functions
ffmpeg | branch: master | Andreas Rheinhardt | Mon Jun 13 08:39:57 2022 +0200| [7c3c1d938f2e01bd607deb814706f67438e85b7a] | committer: Andreas Rheinhardt avfilter/x86/vf_idet: Remove obsolete MMX(EXT) functions The only system which benefit from these are truely ancient 32bit x86s as all other systems use at least the SSE2 versions (this includes all x64 cpus (which is why this code is restricted to x86-32)). Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=7c3c1d938f2e01bd607deb814706f67438e85b7a --- libavfilter/x86/vf_idet.asm| 58 -- libavfilter/x86/vf_idet_init.c | 16 +--- 2 files changed, 1 insertion(+), 73 deletions(-) diff --git a/libavfilter/x86/vf_idet.asm b/libavfilter/x86/vf_idet.asm index 9596abd7e2..7bc8e7d2c4 100644 --- a/libavfilter/x86/vf_idet.asm +++ b/libavfilter/x86/vf_idet.asm @@ -25,60 +25,6 @@ SECTION .text -; Implementation that does 8-bytes at a time using single-word operations. -%macro IDET_FILTER_LINE 1 -INIT_MMX %1 -cglobal idet_filter_line, 4, 5, 0, a, b, c, width, index -xor indexq, indexq -%define m_zero m2 -%define m_sum m5 -pxor m_sum, m_sum -pxor m_zero, m_zero - -.loop: -movu m0, [aq + indexq*1] -punpckhbw m1, m0, m_zero -punpcklbw m0, m_zero - -movu m3, [cq + indexq*1] -punpckhbw m4, m3, m_zero -punpcklbw m3, m_zero - -paddswm1, m4 -paddswm0, m3 - -movu m3, [bq + indexq*1] -punpckhbw m4, m3, m_zero -punpcklbw m3, m_zero - -paddw m4, m4 -paddw m3, m3 -psubswm1, m4 -psubswm0, m3 - -ABS2 m1, m0, m4, m3 - -paddw m0, m1 -punpckhwd m1, m0, m_zero -punpcklwd m0, m_zero - -paddd m0, m1 -paddd m_sum, m0 - -add indexq, 0x8 -CMP widthd, indexd -jg.loop - -HADDD m_sum, m0 -movd eax, m_sum -RET -%endmacro - -%if ARCH_X86_32 -IDET_FILTER_LINE mmxext -IDET_FILTER_LINE mmx -%endif - ;** ; 16bit implementation that does 4/8-pixels at a time @@ -128,10 +74,6 @@ cglobal idet_filter_line_16bit, 4, 5, 8, a, b, c, width, index INIT_XMM sse2 IDET_FILTER_LINE_16BIT 8 -%if ARCH_X86_32 -INIT_MMX mmx -IDET_FILTER_LINE_16BIT 4 -%endif ;** ; SSE2 8-bit implementation that does 16-bytes at a time: diff --git a/libavfilter/x86/vf_idet_init.c b/libavfilter/x86/vf_idet_init.c index d4d9bd0893..acb4e2a778 100644 --- a/libavfilter/x86/vf_idet_init.c +++ b/libavfilter/x86/vf_idet_init.c @@ -24,7 +24,7 @@ #if HAVE_X86ASM -/* declares main callable idet_filter_line_{mmx,mmxext,sse2}() */ +/* declares main callable idet_filter_line_sse2() */ #define FUNC_MAIN_DECL(KIND, SPAN)\ int ff_idet_filter_line_##KIND(const uint8_t *a, const uint8_t *b,\ const uint8_t *c, int w); \ @@ -58,11 +58,6 @@ static int idet_filter_line_16bit_##KIND(const uint16_t *a, const uint16_t *b, \ FUNC_MAIN_DECL(sse2, 16) FUNC_MAIN_DECL_16bit(sse2, 8) -#if ARCH_X86_32 -FUNC_MAIN_DECL(mmx, 8) -FUNC_MAIN_DECL(mmxext, 8) -FUNC_MAIN_DECL_16bit(mmx, 4) -#endif #endif av_cold void ff_idet_init_x86(IDETContext *idet, int for_16b) @@ -70,15 +65,6 @@ av_cold void ff_idet_init_x86(IDETContext *idet, int for_16b) #if HAVE_X86ASM const int cpu_flags = av_get_cpu_flags(); -#if ARCH_X86_32 -if (EXTERNAL_MMX(cpu_flags)) { -idet->filter_line = for_16b ? (ff_idet_filter_func)idet_filter_line_16bit_mmx : idet_filter_line_mmx; -} -if (EXTERNAL_MMXEXT(cpu_flags)) { -idet->filter_line = for_16b ? (ff_idet_filter_func)idet_filter_line_16bit_mmx : idet_filter_line_mmxext; -} -#endif // ARCH_x86_32 - if (EXTERNAL_SSE2(cpu_flags)) { idet->filter_line = for_16b ? (ff_idet_filter_func)idet_filter_line_16bit_sse2 : idet_filter_line_sse2; } ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avfilter/x86/vf_bwdif: Remove obsolete MMXEXT functions
ffmpeg | branch: master | Andreas Rheinhardt | Mon Jun 13 08:42:46 2022 +0200| [ed42a51930d9cca6dfed35c4af4b5b3a3f7f6a04] | committer: Andreas Rheinhardt avfilter/x86/vf_bwdif: Remove obsolete MMXEXT functions The only system which benefit from these are truely ancient 32bit x86s as all other systems use at least the SSE2 versions (this includes all x64 cpus (which is why this code is restricted to x86-32)). Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ed42a51930d9cca6dfed35c4af4b5b3a3f7f6a04 --- libavfilter/x86/vf_bwdif.asm| 4 libavfilter/x86/vf_bwdif_init.c | 16 2 files changed, 20 deletions(-) diff --git a/libavfilter/x86/vf_bwdif.asm b/libavfilter/x86/vf_bwdif.asm index 147b7c6ac6..0b453da53b 100644 --- a/libavfilter/x86/vf_bwdif.asm +++ b/libavfilter/x86/vf_bwdif.asm @@ -264,7 +264,3 @@ INIT_XMM ssse3 BWDIF INIT_XMM sse2 BWDIF -%if ARCH_X86_32 -INIT_MMX mmxext -BWDIF -%endif diff --git a/libavfilter/x86/vf_bwdif_init.c b/libavfilter/x86/vf_bwdif_init.c index f632c4f340..e24e5cd9b1 100644 --- a/libavfilter/x86/vf_bwdif_init.c +++ b/libavfilter/x86/vf_bwdif_init.c @@ -24,10 +24,6 @@ #include "libavutil/x86/cpu.h" #include "libavfilter/bwdif.h" -void ff_bwdif_filter_line_mmxext(void *dst, void *prev, void *cur, void *next, - int w, int prefs, int mrefs, int prefs2, - int mrefs2, int prefs3, int mrefs3, int prefs4, - int mrefs4, int parity, int clip_max); void ff_bwdif_filter_line_sse2(void *dst, void *prev, void *cur, void *next, int w, int prefs, int mrefs, int prefs2, int mrefs2, int prefs3, int mrefs3, int prefs4, @@ -37,10 +33,6 @@ void ff_bwdif_filter_line_ssse3(void *dst, void *prev, void *cur, void *next, int mrefs2, int prefs3, int mrefs3, int prefs4, int mrefs4, int parity, int clip_max); -void ff_bwdif_filter_line_12bit_mmxext(void *dst, void *prev, void *cur, void *next, - int w, int prefs, int mrefs, int prefs2, - int mrefs2, int prefs3, int mrefs3, int prefs4, - int mrefs4, int parity, int clip_max); void ff_bwdif_filter_line_12bit_sse2(void *dst, void *prev, void *cur, void *next, int w, int prefs, int mrefs, int prefs2, int mrefs2, int prefs3, int mrefs3, int prefs4, @@ -57,19 +49,11 @@ av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif) int bit_depth = (!yadif->csp) ? 8 : yadif->csp->comp[0].depth; if (bit_depth <= 8) { -#if ARCH_X86_32 -if (EXTERNAL_MMXEXT(cpu_flags)) -bwdif->filter_line = ff_bwdif_filter_line_mmxext; -#endif /* ARCH_X86_32 */ if (EXTERNAL_SSE2(cpu_flags)) bwdif->filter_line = ff_bwdif_filter_line_sse2; if (EXTERNAL_SSSE3(cpu_flags)) bwdif->filter_line = ff_bwdif_filter_line_ssse3; } else if (bit_depth <= 12) { -#if ARCH_X86_32 -if (EXTERNAL_MMXEXT(cpu_flags)) -bwdif->filter_line = ff_bwdif_filter_line_12bit_mmxext; -#endif /* ARCH_X86_32 */ if (EXTERNAL_SSE2(cpu_flags)) bwdif->filter_line = ff_bwdif_filter_line_12bit_sse2; if (EXTERNAL_SSSE3(cpu_flags)) ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avcodec/x86/dct32: Remove obsolete SSE function
ffmpeg | branch: master | Andreas Rheinhardt | Tue Jun 14 20:26:16 2022 +0200| [54784ffac523c6fbc50762a91b3dcc481933b0b0] | committer: Andreas Rheinhardt avcodec/x86/dct32: Remove obsolete SSE function The only systems which benefit from ff_dct32_float_sse are truely ancient 32bit x86s as all other systems use at least the SSE2 versions (this includes all x64 cpus (which is why this code is restricted to x86-32)). Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=54784ffac523c6fbc50762a91b3dcc481933b0b0 --- libavcodec/x86/dct32.asm | 12 +--- libavcodec/x86/dct_init.c | 5 - 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/libavcodec/x86/dct32.asm b/libavcodec/x86/dct32.asm index 21e2f21c97..37fba51543 100644 --- a/libavcodec/x86/dct32.asm +++ b/libavcodec/x86/dct32.asm @@ -387,7 +387,7 @@ INIT_XMM %endif -; void ff_dct32_float_sse(FFTSample *out, const FFTSample *in) +; void ff_dct32_float(FFTSample *out, const FFTSample *in) %macro DCT32_FUNC 0 cglobal dct32_float, 2, 3, 16, out, in, tmp ; pass 1 @@ -474,18 +474,8 @@ cglobal dct32_float, 2, 3, 16, out, in, tmp %endmacro %macro LOAD_INV 2 -%if cpuflag(sse2) pshufd %1, %2, 0x1b -%elif cpuflag(sse) -movaps %1, %2 -shufps %1, %1, 0x1b -%endif %endmacro -%if ARCH_X86_32 -INIT_XMM sse -DCT32_FUNC -%endif - INIT_XMM sse2 DCT32_FUNC diff --git a/libavcodec/x86/dct_init.c b/libavcodec/x86/dct_init.c index c31ef92238..d0e4b34dd3 100644 --- a/libavcodec/x86/dct_init.c +++ b/libavcodec/x86/dct_init.c @@ -22,7 +22,6 @@ #include "libavutil/x86/cpu.h" #include "libavcodec/dct.h" -void ff_dct32_float_sse(FFTSample *out, const FFTSample *in); void ff_dct32_float_sse2(FFTSample *out, const FFTSample *in); void ff_dct32_float_avx(FFTSample *out, const FFTSample *in); @@ -30,10 +29,6 @@ av_cold void ff_dct_init_x86(DCTContext *s) { int cpu_flags = av_get_cpu_flags(); -#if ARCH_X86_32 -if (EXTERNAL_SSE(cpu_flags)) -s->dct32 = ff_dct32_float_sse; -#endif if (EXTERNAL_SSE2(cpu_flags)) s->dct32 = ff_dct32_float_sse2; if (EXTERNAL_AVX_FAST(cpu_flags)) ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avcodec/x86/vp3dsp: Remove obsolete MMX functions
ffmpeg | branch: master | Andreas Rheinhardt | Mon Jun 13 16:57:39 2022 +0200| [eefec0663406d7c2749a280f5244caaacb069c60] | committer: Andreas Rheinhardt avcodec/x86/vp3dsp: Remove obsolete MMX functions The only system which benefit from these are truely ancient 32bit x86s as all other systems use at least the SSE2 versions (this includes all x64 cpus (which is why this code is restricted to x86-32)). Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=eefec0663406d7c2749a280f5244caaacb069c60 --- libavcodec/x86/vp3dsp.asm| 62 libavcodec/x86/vp3dsp_init.c | 7 - 2 files changed, 69 deletions(-) diff --git a/libavcodec/x86/vp3dsp.asm b/libavcodec/x86/vp3dsp.asm index d88d5a1edf..005ecbc9a0 100644 --- a/libavcodec/x86/vp3dsp.asm +++ b/libavcodec/x86/vp3dsp.asm @@ -571,40 +571,25 @@ cglobal vp3_idct_put, 3, 4, 9 mova m1, [r2+mmsize*2+%%i] mova m2, [r2+mmsize*4+%%i] mova m3, [r2+mmsize*6+%%i] -%if mmsize == 8 -packsswb m0, [r2+mmsize*8+%%i] -packsswb m1, [r2+mmsize*10+%%i] -packsswb m2, [r2+mmsize*12+%%i] -packsswb m3, [r2+mmsize*14+%%i] -%else packsswb m0, [r2+mmsize*1+%%i] packsswb m1, [r2+mmsize*3+%%i] packsswb m2, [r2+mmsize*5+%%i] packsswb m3, [r2+mmsize*7+%%i] -%endif paddb m0, m4 paddb m1, m4 paddb m2, m4 paddb m3, m4 movq [r0 ], m0 -%if mmsize == 8 -movq [r0+r1 ], m1 -movq [r0+r1*2], m2 -movq [r0+r3 ], m3 -%else movhps [r0+r1 ], m0 movq [r0+r1*2], m1 movhps [r0+r3 ], m1 -%endif %if %%i == 0 lea r0, [r0+r1*4] %endif -%if mmsize == 16 movq [r0 ], m2 movhps [r0+r1 ], m2 movq [r0+r1*2], m3 movhps [r0+r3 ], m3 -%endif %assign %%i %%i+8 %endrep @@ -621,7 +606,6 @@ cglobal vp3_idct_add, 3, 4, 9 lea r3, [r1*3] pxor m4, m4 -%if mmsize == 16 %assign %%i 0 %rep 2 movq m0, [r0] @@ -647,47 +631,6 @@ cglobal vp3_idct_add, 3, 4, 9 %endif %assign %%i %%i+64 %endrep -%else -%assign %%i 0 -%rep 2 -movq m0, [r0] -movq m1, [r0+r1] -movq m2, [r0+r1*2] -movq m3, [r0+r3] -movq m5, m0 -movq m6, m1 -movq m7, m2 -punpcklbw m0, m4 -punpcklbw m1, m4 -punpcklbw m2, m4 -punpckhbw m5, m4 -punpckhbw m6, m4 -punpckhbw m7, m4 -paddswm0, [r2+ 0+%%i] -paddswm1, [r2+16+%%i] -paddswm2, [r2+32+%%i] -paddswm5, [r2+64+%%i] -paddswm6, [r2+80+%%i] -paddswm7, [r2+96+%%i] -packuswb m0, m5 -movq m5, m3 -punpcklbw m3, m4 -punpckhbw m5, m4 -packuswb m1, m6 -paddswm3, [r2+48+%%i] -paddswm5, [r2+112+%%i] -packuswb m2, m7 -packuswb m3, m5 -movq [r0 ], m0 -movq [r0+r1 ], m1 -movq [r0+r1*2], m2 -movq [r0+r3 ], m3 -%if %%i == 0 -lea r0, [r0+r1*4] -%endif -%assign %%i %%i+8 -%endrep -%endif %assign %%i 0 %rep 128/mmsize mova[r2+%%i], m4 @@ -696,11 +639,6 @@ cglobal vp3_idct_add, 3, 4, 9 RET %endmacro -%if ARCH_X86_32 -INIT_MMX mmx -vp3_idct_funcs -%endif - INIT_XMM sse2 vp3_idct_funcs diff --git a/libavcodec/x86/vp3dsp_init.c b/libavcodec/x86/vp3dsp_init.c index ba47e1c6cd..f54fa57b3e 100644 --- a/libavcodec/x86/vp3dsp_init.c +++ b/libavcodec/x86/vp3dsp_init.c @@ -26,9 +26,6 @@ #include "libavcodec/avcodec.h" #include "libavcodec/vp3dsp.h" -void ff_vp3_idct_put_mmx(uint8_t *dest, ptrdiff_t stride, int16_t *block); -void ff_vp3_idct_add_mmx(uint8_t *dest, ptrdiff_t stride, int16_t *block); - void ff_vp3_idct_put_sse2(uint8_t *dest, ptrdiff_t stride, int16_t *block); void ff_vp3_idct_add_sse2(uint8_t *dest, ptrdiff_t stride, int16_t *block); @@ -49,10 +46,6 @@ av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags) if (EXTERNAL_MMX(cpu_flags)) { c->put_no_rnd_pixels_l2 = ff_put_vp_no_rnd_pixels8_l2_mmx; -#if ARCH_X86_32 -c->idct_put = ff_vp3_idct_put_mmx; -c->idct_add = ff_vp3_idct_add_mmx; -#endif } if (EXTERNAL_MMXEXT(cpu_flags)) { ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avcodec/x86/mpegaudiodsp: Remove obsolete SSE function
ffmpeg | branch: master | Andreas Rheinhardt | Tue Jun 14 20:21:03 2022 +0200| [25e39f8c727190520e1274be9bf0b4b3302587f9] | committer: Andreas Rheinhardt avcodec/x86/mpegaudiodsp: Remove obsolete SSE function The only systems which benefit from imdct36_blocks_sse are truely ancient 32bit x86s as all other systems use at least the SSE2 versions (this includes all x64 cpus (which is why this code is restricted to x86-32)). Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=25e39f8c727190520e1274be9bf0b4b3302587f9 --- libavcodec/x86/imdct36.asm| 5 - libavcodec/x86/mpegaudiodsp.c | 11 --- 2 files changed, 16 deletions(-) diff --git a/libavcodec/x86/imdct36.asm b/libavcodec/x86/imdct36.asm index b386ab95fc..888c6bf4d6 100644 --- a/libavcodec/x86/imdct36.asm +++ b/libavcodec/x86/imdct36.asm @@ -373,11 +373,6 @@ cglobal imdct36_float, 4,4,9, out, buf, in, win RET %endmacro -%if ARCH_X86_32 -INIT_XMM sse -DEFINE_IMDCT -%endif - INIT_XMM sse2 DEFINE_IMDCT diff --git a/libavcodec/x86/mpegaudiodsp.c b/libavcodec/x86/mpegaudiodsp.c index dcea94a1f5..6586fe0726 100644 --- a/libavcodec/x86/mpegaudiodsp.c +++ b/libavcodec/x86/mpegaudiodsp.c @@ -34,9 +34,6 @@ static void imdct36_blocks_ ## CPU(float *out, float *buf, float *in, int count, void ff_imdct36_float_ ## CPU(float *out, float *buf, float *in, float *win); #if HAVE_X86ASM -#if ARCH_X86_32 -DECL(sse) -#endif DECL(sse2) DECL(sse3) DECL(ssse3) @@ -230,9 +227,6 @@ static void imdct36_blocks_ ## CPU1(float *out, float *buf, float *in, \ } #if HAVE_SSE -#if ARCH_X86_32 -DECL_IMDCT_BLOCKS(sse,sse) -#endif DECL_IMDCT_BLOCKS(sse2,sse) DECL_IMDCT_BLOCKS(sse3,sse) DECL_IMDCT_BLOCKS(ssse3,sse) @@ -271,11 +265,6 @@ av_cold void ff_mpadsp_init_x86(MPADSPContext *s) #if HAVE_X86ASM #if HAVE_SSE -#if ARCH_X86_32 -if (EXTERNAL_SSE(cpu_flags)) { -s->imdct36_blocks_float = imdct36_blocks_sse; -} -#endif if (EXTERNAL_SSE2(cpu_flags)) { s->imdct36_blocks_float = imdct36_blocks_sse2; } ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avcodec/x86/synth_filter: Remove obsolete SSE function
ffmpeg | branch: master | Andreas Rheinhardt | Tue Jun 14 20:31:03 2022 +0200| [eb33fd384e70900644b5c1a06e266819af32b02e] | committer: Andreas Rheinhardt avcodec/x86/synth_filter: Remove obsolete SSE function The only systems which benefit from synth_filter_sse are truely ancient 32bit x86s as all other systems use at least the SSE2 versions (this includes all x64 cpus (which is why this code is restricted to x86-32)). Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=eb33fd384e70900644b5c1a06e266819af32b02e --- libavcodec/x86/synth_filter.asm| 6 +- libavcodec/x86/synth_filter_init.c | 8 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/libavcodec/x86/synth_filter.asm b/libavcodec/x86/synth_filter.asm index bc1a48f409..22f57c3309 100644 --- a/libavcodec/x86/synth_filter.asm +++ b/libavcodec/x86/synth_filter.asm @@ -115,7 +115,7 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ synth_buf, synth_buf2, window, out, off, scale %define scale m0 %if ARCH_X86_32 || WIN64 -%if cpuflag(sse2) && notcpuflag(avx) +%if notcpuflag(avx) movd scale, scalem SPLATDm0 %else @@ -234,10 +234,6 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ RET %endmacro -%if ARCH_X86_32 -INIT_XMM sse -SYNTH_FILTER -%endif INIT_XMM sse2 SYNTH_FILTER INIT_YMM avx diff --git a/libavcodec/x86/synth_filter_init.c b/libavcodec/x86/synth_filter_init.c index 35e2b47a3e..7c76ac8d05 100644 --- a/libavcodec/x86/synth_filter_init.c +++ b/libavcodec/x86/synth_filter_init.c @@ -43,9 +43,6 @@ static void synth_filter_##opt(FFTContext *imdct, \ } \ #if HAVE_X86ASM -#if ARCH_X86_32 -SYNTH_FILTER_FUNC(sse) -#endif SYNTH_FILTER_FUNC(sse2) SYNTH_FILTER_FUNC(avx) SYNTH_FILTER_FUNC(fma3) @@ -56,11 +53,6 @@ av_cold void ff_synth_filter_init_x86(SynthFilterContext *s) #if HAVE_X86ASM int cpu_flags = av_get_cpu_flags(); -#if ARCH_X86_32 -if (EXTERNAL_SSE(cpu_flags)) { -s->synth_filter_float = synth_filter_sse; -} -#endif if (EXTERNAL_SSE2(cpu_flags)) { s->synth_filter_float = synth_filter_sse2; } ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avcodec/x86/vp6dsp: Remove obsolete MMX ff_vp6_filter_diag4_mmx
ffmpeg | branch: master | Andreas Rheinhardt | Mon Jun 13 17:02:42 2022 +0200| [6cb3ee80b3b58d692a722fb38ee05f170ae8b0d2] | committer: Andreas Rheinhardt avcodec/x86/vp6dsp: Remove obsolete MMX ff_vp6_filter_diag4_mmx The only systems which benefit from it are truely ancient 32bit x86s as all other systems use at least the SSE2 versions (this includes all x64 cpus (which is why this code is restricted to x86-32)). Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6cb3ee80b3b58d692a722fb38ee05f170ae8b0d2 --- libavcodec/x86/vp6dsp.asm| 16 +--- libavcodec/x86/vp6dsp_init.c | 7 --- 2 files changed, 1 insertion(+), 22 deletions(-) diff --git a/libavcodec/x86/vp6dsp.asm b/libavcodec/x86/vp6dsp.asm index 0be531e5c2..512fe89def 100644 --- a/libavcodec/x86/vp6dsp.asm +++ b/libavcodec/x86/vp6dsp.asm @@ -114,18 +114,13 @@ SECTION .text %endif ; mmsize == 8/16 %endmacro -%macro vp6_filter_diag4 0 ; void ff_vp6_filter_diag4_(uint8_t *dst, uint8_t *src, ptrdiff_t stride, ;const int16_t h_weight[4], const int16_t v_weights[4]) +INIT_XMM sse2 cglobal vp6_filter_diag4, 5, 7, 8 mov r5, rsp ; backup stack pointer and rsp, ~(mmsize-1) ; align stack -%if mmsize == 16 sub rsp, 8*11 -%else -sub rsp, 8*15 -movq m6, [pw_64] -%endif sub r1, r2 @@ -156,12 +151,3 @@ cglobal vp6_filter_diag4, 5, 7, 8 mov rsp, r5 ; restore stack pointer RET -%endmacro - -%if ARCH_X86_32 -INIT_MMX mmx -vp6_filter_diag4 -%endif - -INIT_XMM sse2 -vp6_filter_diag4 diff --git a/libavcodec/x86/vp6dsp_init.c b/libavcodec/x86/vp6dsp_init.c index ce498931d0..83d45ec36c 100644 --- a/libavcodec/x86/vp6dsp_init.c +++ b/libavcodec/x86/vp6dsp_init.c @@ -25,8 +25,6 @@ #include "libavutil/x86/cpu.h" #include "libavcodec/vp56dsp.h" -void ff_vp6_filter_diag4_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t stride, - const int16_t *h_weights,const int16_t *v_weights); void ff_vp6_filter_diag4_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t stride, const int16_t *h_weights,const int16_t *v_weights); @@ -34,11 +32,6 @@ av_cold void ff_vp6dsp_init_x86(VP56DSPContext *c) { int cpu_flags = av_get_cpu_flags(); -#if ARCH_X86_32 -if (EXTERNAL_MMX(cpu_flags)) { -c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx; -} -#endif if (EXTERNAL_SSE2(cpu_flags)) { c->vp6_filter_diag4 = ff_vp6_filter_diag4_sse2; } ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avcodec/x86/rv34dsp: Remove obsolete MMX function
ffmpeg | branch: master | Andreas Rheinhardt | Tue Jun 14 20:34:24 2022 +0200| [8360354ae81982d6510fa54979c23f714b0790e2] | committer: Andreas Rheinhardt avcodec/x86/rv34dsp: Remove obsolete MMX function The only systems which benefit from ff_rv34_idct_dc_add_mmx are truely ancient 32bit x86s as all other systems use at least the SSE2 versions (this includes all x64 cpus (which is why this code is restricted to x86-32)). Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8360354ae81982d6510fa54979c23f714b0790e2 --- libavcodec/x86/rv34dsp.asm| 37 - libavcodec/x86/rv34dsp_init.c | 3 --- 2 files changed, 40 deletions(-) diff --git a/libavcodec/x86/rv34dsp.asm b/libavcodec/x86/rv34dsp.asm index 5568ddfdf8..0a3d99c53f 100644 --- a/libavcodec/x86/rv34dsp.asm +++ b/libavcodec/x86/rv34dsp.asm @@ -56,43 +56,6 @@ cglobal rv34_idct_dc_noround, 1, 2, 0 movq[r0+24], m0 REP_RET -; ff_rv34_idct_dc_add_mmx(uint8_t *dst, int stride, int dc); -%if ARCH_X86_32 -INIT_MMX mmx -cglobal rv34_idct_dc_add, 3, 3 -; calculate DC -IDCT_DC_ROUND r2 -pxor m1, m1 -movd m0, r2d -psubw m1, m0 -packuswb m0, m0 -packuswb m1, m1 -punpcklbw m0, m0 -punpcklbw m1, m1 -punpcklwd m0, m0 -punpcklwd m1, m1 - -; add DC -lear2, [r0+r1*2] -movh m2, [r0] -movh m3, [r0+r1] -movh m4, [r2] -movh m5, [r2+r1] -paddusbm2, m0 -paddusbm3, m0 -paddusbm4, m0 -paddusbm5, m0 -psubusbm2, m1 -psubusbm3, m1 -psubusbm4, m1 -psubusbm5, m1 -movh [r0], m2 -movh [r0+r1], m3 -movh [r2], m4 -movh [r2+r1], m5 -RET -%endif - ; Load coeffs and perform row transform ; Output: coeffs in mm[0467], rounder in mm5 %macro ROW_TRANSFORM 1 diff --git a/libavcodec/x86/rv34dsp_init.c b/libavcodec/x86/rv34dsp_init.c index 7310122458..caa5c2d653 100644 --- a/libavcodec/x86/rv34dsp_init.c +++ b/libavcodec/x86/rv34dsp_init.c @@ -26,7 +26,6 @@ void ff_rv34_idct_dc_mmxext(int16_t *block); void ff_rv34_idct_dc_noround_mmxext(int16_t *block); -void ff_rv34_idct_dc_add_mmx(uint8_t *dst, ptrdiff_t stride, int dc); void ff_rv34_idct_dc_add_sse2(uint8_t *dst, ptrdiff_t stride, int dc); void ff_rv34_idct_dc_add_sse4(uint8_t *dst, ptrdiff_t stride, int dc); void ff_rv34_idct_add_mmxext(uint8_t *dst, ptrdiff_t stride, int16_t *block); @@ -35,8 +34,6 @@ av_cold void ff_rv34dsp_init_x86(RV34DSPContext* c) { int cpu_flags = av_get_cpu_flags(); -if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) -c->rv34_idct_dc_add = ff_rv34_idct_dc_add_mmx; if (EXTERNAL_MMXEXT(cpu_flags)) { c->rv34_inv_transform_dc = ff_rv34_idct_dc_noround_mmxext; c->rv34_idct_add = ff_rv34_idct_add_mmxext; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avcodec/x86/dcadsp: Remove obsolete SSE function
ffmpeg | branch: master | Andreas Rheinhardt | Mon Jun 20 03:07:41 2022 +0200| [61e3cccd367a1daf4aedffa65f5be038aa5cebe1] | committer: Andreas Rheinhardt avcodec/x86/dcadsp: Remove obsolete SSE function The only systems which benefit from ff_lfe_fir0_float_sse are truely ancient 32bit x86s as all other systems use at least the SSE2 versions (this includes all x64 cpus (which is why this code is restricted to x86-32)). Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=61e3cccd367a1daf4aedffa65f5be038aa5cebe1 --- libavcodec/x86/dcadsp.asm| 17 ++--- libavcodec/x86/dcadsp_init.c | 3 --- 2 files changed, 2 insertions(+), 18 deletions(-) diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm index 055361a765..de9fd6f346 100644 --- a/libavcodec/x86/dcadsp.asm +++ b/libavcodec/x86/dcadsp.asm @@ -42,22 +42,13 @@ cglobal lfe_fir0_float, 4, 6, 12 + cpuflag(fma3)*4, samples, lfe, coeff, nblocks cvtdq2ps m5, [lfeq ] shufpsm7, m4, m4, q0123 shufpsm6, m5, m5, q0123 -%elif cpuflag(sse2) +%else movu m4, [lfeq+16] movu m5, [lfeq ] cvtdq2ps m4, m4 cvtdq2ps m5, m5 pshufdm7, m4, q0123 pshufdm6, m5, q0123 -%else -cvtpi2ps m4, [lfeq+16] -cvtpi2ps m0, [lfeq+24] -cvtpi2ps m5, [lfeq ] -cvtpi2ps m1, [lfeq+8 ] -shufpsm4, m0, q1010 -shufpsm5, m1, q1010 -shufpsm7, m4, m4, q0123 -shufpsm6, m5, m5, q0123 %endif .inner_loop: @@ -206,10 +197,6 @@ cglobal lfe_fir0_float, 4, 6, 12 + cpuflag(fma3)*4, samples, lfe, coeff, nblocks RET %endmacro -%if ARCH_X86_32 -INIT_XMM sse -LFE_FIR0_FLOAT -%endif INIT_XMM sse2 LFE_FIR0_FLOAT %if HAVE_AVX_EXTERNAL @@ -235,7 +222,7 @@ cglobal lfe_fir1_float, 4, 6, 10, samples, lfe, coeff, nblocks, cnt1, cnt2 %if cpuflag(avx) cvtdq2ps m4, [lfeq] shufpsm5, m4, m4, q0123 -%elif cpuflag(sse2) +%else movu m4, [lfeq] cvtdq2ps m4, m4 pshufdm5, m4, q0123 diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c index fc10fb8bc5..0c78dd1c9e 100644 --- a/libavcodec/x86/dcadsp_init.c +++ b/libavcodec/x86/dcadsp_init.c @@ -27,7 +27,6 @@ void ff_lfe_fir0_float_##opt(float *pcm_samples, int32_t *lfe_samples, \ void ff_lfe_fir1_float_##opt(float *pcm_samples, int32_t *lfe_samples, \ const float *filter_coeff, ptrdiff_t npcmblocks); -LFE_FIR_FLOAT_FUNC(sse) LFE_FIR_FLOAT_FUNC(sse2) LFE_FIR_FLOAT_FUNC(sse3) LFE_FIR_FLOAT_FUNC(avx) @@ -37,8 +36,6 @@ av_cold void ff_dcadsp_init_x86(DCADSPContext *s) { int cpu_flags = av_get_cpu_flags(); -if (ARCH_X86_32 && EXTERNAL_SSE(cpu_flags)) -s->lfe_fir_float[0] = ff_lfe_fir0_float_sse; if (EXTERNAL_SSE2(cpu_flags)) s->lfe_fir_float[0] = ff_lfe_fir0_float_sse2; if (EXTERNAL_SSE3(cpu_flags)) ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avcodec/x86/huffyuvdsp: Remove obsolete MMX functions
ffmpeg | branch: master | Andreas Rheinhardt | Mon Jun 20 06:41:42 2022 +0200| [4b6ffc2880e33d05ed1ab6bbc38e5a795f14b504] | committer: Andreas Rheinhardt avcodec/x86/huffyuvdsp: Remove obsolete MMX functions The only systems which benefit from these are truely ancient 32bit x86s as all other systems use at least the SSE2 versions (this includes all x64 cpus (which is why this code is restricted to x86-32)). Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4b6ffc2880e33d05ed1ab6bbc38e5a795f14b504 --- libavcodec/x86/huffyuvdsp.asm| 23 +-- libavcodec/x86/huffyuvdsp_init.c | 8 2 files changed, 1 insertion(+), 30 deletions(-) diff --git a/libavcodec/x86/huffyuvdsp.asm b/libavcodec/x86/huffyuvdsp.asm index a1231f1b22..c5c40e991b 100644 --- a/libavcodec/x86/huffyuvdsp.asm +++ b/libavcodec/x86/huffyuvdsp.asm @@ -32,24 +32,15 @@ SECTION .text %macro ADD_INT16 0 cglobal add_int16, 4,4,5, dst, src, mask, w, tmp -%if mmsize > 8 test srcq, mmsize-1 jnz .unaligned test dstq, mmsize-1 jnz .unaligned -%endif INT16_LOOP a, add -%if mmsize > 8 .unaligned: INT16_LOOP u, add -%endif %endmacro -%if ARCH_X86_32 -INIT_MMX mmx -ADD_INT16 -%endif - INIT_XMM sse2 ADD_INT16 @@ -60,7 +51,7 @@ ADD_INT16 ; void add_hfyu_left_pred_bgr32(uint8_t *dst, const uint8_t *src, ; intptr_t w, uint8_t *left) -%macro LEFT_BGR32 0 +INIT_XMM sse2 cglobal add_hfyu_left_pred_bgr32, 4,4,3, dst, src, w, left shl wq, 2 movd m0, [leftq] @@ -71,17 +62,12 @@ cglobal add_hfyu_left_pred_bgr32, 4,4,3, dst, src, w, left .loop: movu m1, [srcq+wq] mova m2, m1 -%if mmsize == 8 -punpckhdq m0, m0 -%endif LSHIFTm1, 4 paddb m1, m2 -%if mmsize == 16 pshufdm0, m0, q mova m2, m1 LSHIFTm1, 8 paddb m1, m2 -%endif paddb m0, m1 movu [dstq+wq], m0 add wq, mmsize @@ -89,14 +75,7 @@ cglobal add_hfyu_left_pred_bgr32, 4,4,3, dst, src, w, left movd m0, [dstq-4] movd [leftq], m0 REP_RET -%endmacro -%if ARCH_X86_32 -INIT_MMX mmx -LEFT_BGR32 -%endif -INIT_XMM sse2 -LEFT_BGR32 ; void add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int mask, int w, int *left, int *left_top) INIT_MMX mmxext diff --git a/libavcodec/x86/huffyuvdsp_init.c b/libavcodec/x86/huffyuvdsp_init.c index eb10de383d..239d3ca313 100644 --- a/libavcodec/x86/huffyuvdsp_init.c +++ b/libavcodec/x86/huffyuvdsp_init.c @@ -26,12 +26,9 @@ #include "libavutil/x86/cpu.h" #include "libavcodec/huffyuvdsp.h" -void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w); void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w); void ff_add_int16_avx2(uint16_t *dst, const uint16_t *src, unsigned mask, int w); -void ff_add_hfyu_left_pred_bgr32_mmx(uint8_t *dst, const uint8_t *src, - intptr_t w, uint8_t *left); void ff_add_hfyu_left_pred_bgr32_sse2(uint8_t *dst, const uint8_t *src, intptr_t w, uint8_t *left); void ff_add_hfyu_median_pred_int16_mmxext(uint16_t *dst, const uint16_t *top, const uint16_t *diff, unsigned mask, int w, int *left, int *left_top); @@ -41,11 +38,6 @@ av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c, enum AVPixelFormat pix int cpu_flags = av_get_cpu_flags(); const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(pix_fmt); -if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) { -c->add_hfyu_left_pred_bgr32 = ff_add_hfyu_left_pred_bgr32_mmx; -c->add_int16 = ff_add_int16_mmx; -} - if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc && pix_desc->comp[0].depth<16) { c->add_hfyu_median_pred_int16 = ff_add_hfyu_median_pred_int16_mmxext; } ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avcodec/x86/vp8dsp: Remove obsolete MMX(EXT) functions
ffmpeg | branch: master | Andreas Rheinhardt | Mon Jun 20 05:55:04 2022 +0200| [6a551f14050674fb685920eb1b0640810cacccf9] | committer: Andreas Rheinhardt avcodec/x86/vp8dsp: Remove obsolete MMX(EXT) functions The only systems which benefit from these are truely ancient 32bit x86s as all other systems use at least the SSE2 versions (this includes all x64 cpus (which is why this code is restricted to x86-32)). Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6a551f14050674fb685920eb1b0640810cacccf9 --- libavcodec/x86/vp8dsp.asm| 119 +--- libavcodec/x86/vp8dsp_init.c | 84 - libavcodec/x86/vp8dsp_loopfilter.asm | 354 +-- 3 files changed, 4 insertions(+), 553 deletions(-) diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm index 75de5690a1..1c59e884ed 100644 --- a/libavcodec/x86/vp8dsp.asm +++ b/libavcodec/x86/vp8dsp.asm @@ -840,25 +840,6 @@ cglobal put_vp8_pixels8, 5, 5, 0, dst, dststride, src, srcstride, height jg .nextrow REP_RET -%if ARCH_X86_32 -INIT_MMX mmx -cglobal put_vp8_pixels16, 5, 5, 0, dst, dststride, src, srcstride, height -.nextrow: -movqmm0, [srcq+srcstrideq*0+0] -movqmm1, [srcq+srcstrideq*0+8] -movqmm2, [srcq+srcstrideq*1+0] -movqmm3, [srcq+srcstrideq*1+8] -leasrcq, [srcq+srcstrideq*2] -movq [dstq+dststrideq*0+0], mm0 -movq [dstq+dststrideq*0+8], mm1 -movq [dstq+dststrideq*1+0], mm2 -movq [dstq+dststrideq*1+8], mm3 -leadstq, [dstq+dststrideq*2] -sub heightd, 2 -jg .nextrow -REP_RET -%endif - INIT_XMM sse cglobal put_vp8_pixels16, 5, 5, 2, dst, dststride, src, srcstride, height .nextrow: @@ -895,32 +876,6 @@ cglobal put_vp8_pixels16, 5, 5, 2, dst, dststride, src, srcstride, height %4 [dst2q+strideq+%3], m5 %endmacro -%if ARCH_X86_32 -INIT_MMX mmx -cglobal vp8_idct_dc_add, 3, 3, 0, dst, block, stride -; load data -movd m0, [blockq] - -; calculate DC -paddw m0, [pw_4] -pxor m1, m1 -psraw m0, 3 -movd [blockq], m1 -psubw m1, m0 -packuswb m0, m0 -packuswb m1, m1 -punpcklbw m0, m0 -punpcklbw m1, m1 -punpcklwd m0, m0 -punpcklwd m1, m1 - -; add DC -DEFINE_ARGS dst1, dst2, stride -lea dst2q, [dst1q+strideq*2] -ADD_DC m0, m1, 0, movh -RET -%endif - %macro VP8_IDCT_DC_ADD 0 cglobal vp8_idct_dc_add, 3, 3, 6, dst, block, stride ; load data @@ -971,44 +926,6 @@ VP8_IDCT_DC_ADD ; void ff_vp8_idct_dc_add4y_(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride); ;- -%if ARCH_X86_32 -INIT_MMX mmx -cglobal vp8_idct_dc_add4y, 3, 3, 0, dst, block, stride -; load data -movd m0, [blockq+32*0] ; A -movd m1, [blockq+32*2] ; C -punpcklwd m0, [blockq+32*1] ; A B -punpcklwd m1, [blockq+32*3] ; C D -punpckldq m0, m1; A B C D -pxor m6, m6 - -; calculate DC -paddw m0, [pw_4] -movd [blockq+32*0], m6 -movd [blockq+32*1], m6 -movd [blockq+32*2], m6 -movd [blockq+32*3], m6 -psraw m0, 3 -psubw m6, m0 -packuswb m0, m0 -packuswb m6, m6 -punpcklbw m0, m0 ; AABBCCDD -punpcklbw m6, m6 ; AABBCCDD -movq m1, m0 -movq m7, m6 -punpcklbw m0, m0 ; -punpckhbw m1, m1 ; -punpcklbw m6, m6 ; -punpckhbw m7, m7 ; - -; add DC -DEFINE_ARGS dst1, dst2, stride -leadst2q, [dst1q+strideq*2] -ADD_DCm0, m6, 0, mova -ADD_DCm1, m7, 8, mova -RET -%endif - INIT_XMM sse2 cglobal vp8_idct_dc_add4y, 3, 3, 6, dst, block, stride ; load data @@ -1117,7 +1034,7 @@ cglobal vp8_idct_dc_add4uv, 3, 3, 0, dst, block, stride SWAP %4, %3 %endmacro -%macro VP8_IDCT_ADD 0 +INIT_MMX sse cglobal vp8_idct_add, 3, 3, 0, dst, block, stride ; load block data movq m0, [blockq+ 0] @@ -1126,17 +1043,9 @@ cglobal vp8_idct_add, 3, 3, 0, dst, block, stride movq m3, [blockq+24] movq m6, [pw_20091] movq m7, [pw_17734] -%if cpuflag(sse) xorps xmm0, xmm0 movaps [blockq+ 0], xmm0 movaps [blockq+16], xmm0 -%else -pxor m4, m4 -movq [blockq+ 0], m4 -movq [blockq+ 8], m4 -movq [blockq+16], m4 -movq [blockq+24], m4 -%endif ; actual IDCT VP8_IDCT_TRANSFORM4x4_1D 0, 1, 2, 3, 4, 5 @@ -1153,14 +1062,6 @@ cglobal vp8_idct_add, 3, 3, 0, dst, block, stride STORE_DIFFx2 m2, m3, m6, m7, m4, 3, dst2q, strideq RET -%endmacro - -%if ARCH_X86_32 -INIT_MMX mmx -VP8_IDCT_ADD -%endif -INIT_MMX sse -VP8_IDCT_ADD ;- ; void ff_vp8_luma_dc_wht(int16_t block[4][4][16], int16_t dc[16]) @@ -1193,23 +1094,15 @@ VP8_IDCT_ADD
[FFmpeg-cvslog] avcodec/x86/dirac_dwt: Remove obsolete MMX functions
ffmpeg | branch: master | Andreas Rheinhardt | Mon Jun 20 07:08:31 2022 +0200| [5e332fe35cd336a5c7718d5e9a5a93ece0e61a3a] | committer: Andreas Rheinhardt avcodec/x86/dirac_dwt: Remove obsolete MMX functions The only systems which benefit from these are truely ancient 32bit x86s as all other systems use at least the SSE2 versions (this includes all x64 cpus (which is why this code is restricted to x86-32)). Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=5e332fe35cd336a5c7718d5e9a5a93ece0e61a3a --- libavcodec/x86/dirac_dwt.asm| 8 libavcodec/x86/dirac_dwt_init.c | 31 --- 2 files changed, 39 deletions(-) diff --git a/libavcodec/x86/dirac_dwt.asm b/libavcodec/x86/dirac_dwt.asm index 22a5c2..6c8b3c0d88 100644 --- a/libavcodec/x86/dirac_dwt.asm +++ b/libavcodec/x86/dirac_dwt.asm @@ -293,14 +293,6 @@ cglobal horizontal_compose_dd97i_ssse3, 3,6,8, b, tmp, w, x, w2, b_w2 REP_RET -%if ARCH_X86_64 == 0 -INIT_MMX -COMPOSE_VERTICAL mmx -HAAR_HORIZONTAL mmx, 0 -HAAR_HORIZONTAL mmx, 1 -%endif - -;;INIT_XMM INIT_XMM COMPOSE_VERTICAL sse2 HAAR_HORIZONTAL sse2, 0 diff --git a/libavcodec/x86/dirac_dwt_init.c b/libavcodec/x86/dirac_dwt_init.c index 49a6380add..9200618283 100644 --- a/libavcodec/x86/dirac_dwt_init.c +++ b/libavcodec/x86/dirac_dwt_init.c @@ -134,9 +134,6 @@ static void horizontal_compose_haar1i##ext(uint8_t *_b, uint8_t *_tmp, int w)\ \ #if HAVE_X86ASM -#if !ARCH_X86_64 -COMPOSE_VERTICAL(_mmx, 4) -#endif COMPOSE_VERTICAL(_sse2, 8) @@ -163,34 +160,6 @@ void ff_spatial_idwt_init_x86(DWTContext *d, enum dwt_type type) #if HAVE_X86ASM int mm_flags = av_get_cpu_flags(); -#if !ARCH_X86_64 -if (!(mm_flags & AV_CPU_FLAG_MMX)) -return; - -switch (type) { -case DWT_DIRAC_DD9_7: -d->vertical_compose_l0 = (void*)vertical_compose53iL0_mmx; -d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_mmx; -break; -case DWT_DIRAC_LEGALL5_3: -d->vertical_compose_l0 = (void*)vertical_compose53iL0_mmx; -d->vertical_compose_h0 = (void*)vertical_compose_dirac53iH0_mmx; -break; -case DWT_DIRAC_DD13_7: -d->vertical_compose_l0 = (void*)vertical_compose_dd137iL0_mmx; -d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_mmx; -break; -case DWT_DIRAC_HAAR0: -d->vertical_compose = (void*)vertical_compose_haar_mmx; -d->horizontal_compose = horizontal_compose_haar0i_mmx; -break; -case DWT_DIRAC_HAAR1: -d->vertical_compose = (void*)vertical_compose_haar_mmx; -d->horizontal_compose = horizontal_compose_haar1i_mmx; -break; -} -#endif - if (!(mm_flags & AV_CPU_FLAG_SSE2)) return; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avcodec/x86/huffyuvencdsp: Remove obsolete MMX function
ffmpeg | branch: master | Andreas Rheinhardt | Mon Jun 20 07:14:47 2022 +0200| [839fbe0e98881f020e41dc7151d08f2ccb314398] | committer: Andreas Rheinhardt avcodec/x86/huffyuvencdsp: Remove obsolete MMX function The only systems which benefit from ff_diff_int16_mmx are truely ancient 32bit x86s as all other systems use at least the SSE2 versions (this includes all x64 cpus (which is why this code is restricted to x86-32)). Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=839fbe0e98881f020e41dc7151d08f2ccb314398 --- libavcodec/x86/huffyuvencdsp.asm| 9 - libavcodec/x86/huffyuvencdsp_init.c | 6 -- 2 files changed, 15 deletions(-) diff --git a/libavcodec/x86/huffyuvencdsp.asm b/libavcodec/x86/huffyuvencdsp.asm index d994fd0fd6..8bfd0face0 100644 --- a/libavcodec/x86/huffyuvencdsp.asm +++ b/libavcodec/x86/huffyuvencdsp.asm @@ -36,26 +36,17 @@ SECTION .text %macro DIFF_INT16 0 cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w, tmp -%if mmsize > 8 test src1q, mmsize-1 jnz .unaligned test src2q, mmsize-1 jnz .unaligned test dstq, mmsize-1 jnz .unaligned -%endif INT16_LOOP a, sub -%if mmsize > 8 .unaligned: INT16_LOOP u, sub -%endif %endmacro -%if ARCH_X86_32 -INIT_MMX mmx -DIFF_INT16 -%endif - INIT_XMM sse2 DIFF_INT16 diff --git a/libavcodec/x86/huffyuvencdsp_init.c b/libavcodec/x86/huffyuvencdsp_init.c index 6c6e068cf8..cc6dc5a560 100644 --- a/libavcodec/x86/huffyuvencdsp_init.c +++ b/libavcodec/x86/huffyuvencdsp_init.c @@ -28,8 +28,6 @@ #include "libavutil/x86/cpu.h" #include "libavcodec/huffyuvencdsp.h" -void ff_diff_int16_mmx (uint16_t *dst, const uint16_t *src1, const uint16_t *src2, -unsigned mask, int w); void ff_diff_int16_sse2(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w); void ff_diff_int16_avx2(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, @@ -42,10 +40,6 @@ av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c, AVCodecContext * av_unused int cpu_flags = av_get_cpu_flags(); const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(avctx->pix_fmt); -if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) { -c->diff_int16 = ff_diff_int16_mmx; -} - if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc && pix_desc->comp[0].depth<16) { c->sub_hfyu_median_pred_int16 = ff_sub_hfyu_median_pred_int16_mmxext; } ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avcodec/x86/lossless_videoencdsp: Remove obsolete MMX function
ffmpeg | branch: master | Andreas Rheinhardt | Mon Jun 20 07:20:40 2022 +0200| [230ea38de143368729ee1cce47b3a87fbafad8e4] | committer: Andreas Rheinhardt avcodec/x86/lossless_videoencdsp: Remove obsolete MMX function The only systems which benefit from ff_diff_bytes_mmx are truely ancient 32bit x86s as all other systems use at least the SSE2 versions (this includes all x64 cpus (which is why this code is restricted to x86-32)). Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=230ea38de143368729ee1cce47b3a87fbafad8e4 --- libavcodec/x86/lossless_videoencdsp.asm| 9 - libavcodec/x86/lossless_videoencdsp_init.c | 6 -- 2 files changed, 15 deletions(-) diff --git a/libavcodec/x86/lossless_videoencdsp.asm b/libavcodec/x86/lossless_videoencdsp.asm index fb1204f0f1..2e1d01bc2c 100644 --- a/libavcodec/x86/lossless_videoencdsp.asm +++ b/libavcodec/x86/lossless_videoencdsp.asm @@ -113,15 +113,6 @@ cglobal diff_bytes, 4,5,2, dst, src1, src2, w REP_RET %endmacro -%if ARCH_X86_32 -INIT_MMX mmx -DIFF_BYTES_PROLOGUE -%define regsize mmsize -DIFF_BYTES_LOOP_PREP .skip_main_aa, .end_aa -DIFF_BYTES_BODYa, a -%undef i -%endif - INIT_XMM sse2 DIFF_BYTES_PROLOGUE %define regsize mmsize diff --git a/libavcodec/x86/lossless_videoencdsp_init.c b/libavcodec/x86/lossless_videoencdsp_init.c index 40407add52..b3efcfdcd7 100644 --- a/libavcodec/x86/lossless_videoencdsp_init.c +++ b/libavcodec/x86/lossless_videoencdsp_init.c @@ -29,8 +29,6 @@ #include "libavcodec/lossless_videoencdsp.h" #include "libavcodec/mathops.h" -void ff_diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, - intptr_t w); void ff_diff_bytes_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, intptr_t w); void ff_diff_bytes_avx2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, @@ -87,10 +85,6 @@ av_cold void ff_llvidencdsp_init_x86(LLVidEncDSPContext *c) { av_unused int cpu_flags = av_get_cpu_flags(); -if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) { -c->diff_bytes = ff_diff_bytes_mmx; -} - #if HAVE_INLINE_ASM if (INLINE_MMXEXT(cpu_flags)) { c->sub_median_pred = sub_median_pred_mmxext; ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] avcodec/x86/lossless_videodsp: Remove obsolete MMX(EXT) functions
ffmpeg | branch: master | Andreas Rheinhardt | Mon Jun 20 07:31:42 2022 +0200| [fed07efcde72824ac1ada80d4af4e91ac4fcfc14] | committer: Andreas Rheinhardt avcodec/x86/lossless_videodsp: Remove obsolete MMX(EXT) functions The only systems which benefit from these are truely ancient 32bit x86s as all other systems use at least the SSE2 versions (this includes all x64 cpus (which is why this code is restricted to x86-32)). Signed-off-by: Andreas Rheinhardt > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=fed07efcde72824ac1ada80d4af4e91ac4fcfc14 --- libavcodec/x86/lossless_videodsp.asm| 20 +++- libavcodec/x86/lossless_videodsp_init.c | 58 - 2 files changed, 4 insertions(+), 74 deletions(-) diff --git a/libavcodec/x86/lossless_videodsp.asm b/libavcodec/x86/lossless_videodsp.asm index 0a1b7091c9..eb1b80506e 100644 --- a/libavcodec/x86/lossless_videodsp.asm +++ b/libavcodec/x86/lossless_videodsp.asm @@ -38,11 +38,11 @@ pb_67676767: db -1,-1,-1,-1,-1,-1,-1,-1, 6, 7, 6, 7, 6, 7, 6, 7 SECTION .text ;-- -; void ff_add_median_pred_mmxext(uint8_t *dst, const uint8_t *top, -;const uint8_t *diff, int w, -;int *left, int *left_top) +; void ff_add_median_pred(uint8_t *dst, const uint8_t *top, +; const uint8_t *diff, int w, +; int *left, int *left_top) ;-- -%macro MEDIAN_PRED 0 +INIT_XMM sse2 cglobal add_median_pred, 6,6,8, dst, top, diff, w, left, left_top movum0, [topq] movam2, m0 @@ -100,14 +100,6 @@ cglobal add_median_pred, 6,6,8, dst, top, diff, w, left, left_top movzx r2d, byte [topq-1] mov [left_topq], r2d RET -%endmacro - -%if ARCH_X86_32 -INIT_MMX mmxext -MEDIAN_PRED -%endif -INIT_XMM sse2 -MEDIAN_PRED %macro ADD_LEFT_LOOP 2 ; %1 = dst_is_aligned, %2 = src_is_aligned @@ -240,10 +232,6 @@ cglobal add_bytes, 3,4,2, dst, src, w, size REP_RET %endmacro -%if ARCH_X86_32 -INIT_MMX mmx -ADD_BYTES -%endif INIT_XMM sse2 ADD_BYTES diff --git a/libavcodec/x86/lossless_videodsp_init.c b/libavcodec/x86/lossless_videodsp_init.c index 6d71f14e7f..5690cacaad 100644 --- a/libavcodec/x86/lossless_videodsp_init.c +++ b/libavcodec/x86/lossless_videodsp_init.c @@ -19,17 +19,12 @@ */ #include "config.h" -#include "libavutil/x86/asm.h" #include "../lossless_videodsp.h" #include "libavutil/x86/cpu.h" -void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t w); void ff_add_bytes_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t w); void ff_add_bytes_avx2(uint8_t *dst, uint8_t *src, ptrdiff_t w); -void ff_add_median_pred_mmxext(uint8_t *dst, const uint8_t *top, - const uint8_t *diff, ptrdiff_t w, - int *left, int *left_top); void ff_add_median_pred_sse2(uint8_t *dst, const uint8_t *top, const uint8_t *diff, ptrdiff_t w, int *left, int *left_top); @@ -47,63 +42,10 @@ int ff_add_left_pred_int16_unaligned_ssse3(uint16_t *dst, const uint16_t *src, u void ff_add_gradient_pred_ssse3(uint8_t *src, const ptrdiff_t stride, const ptrdiff_t width); void ff_add_gradient_pred_avx2(uint8_t *src, const ptrdiff_t stride, const ptrdiff_t width); -#if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32 -static void add_median_pred_cmov(uint8_t *dst, const uint8_t *top, - const uint8_t *diff, ptrdiff_t w, - int *left, int *left_top) -{ -x86_reg w2 = -w; -x86_reg x; -int l = *left & 0xff; -int tl = *left_top & 0xff; -int t; -__asm__ volatile ( -"mov %7, %3\n" -"1: \n" -"movzbl (%3, %4), %2\n" -"mov %2, %k3 \n" -"sub %b1, %b3 \n" -"add %b0, %b3 \n" -"mov %2, %1\n" -"cmp %0, %2\n" -"cmovg%0, %2\n" -"cmovg%1, %0\n" -"cmp %k3, %0\n" -"cmovg %k3, %0\n" -"mov %7, %3\n" -"cmp %2, %0\n" -"cmovl%2, %0\n" -"add(%6, %4), %b0 \n" -"mov %b0, (%5, %4) \n" -"inc %4\n" -"jl 1b\n" -: "+&q"(l), "+&q"(tl), "=&r"(t), "=&q"(x), "+&r"(w2) -: "r"(dst + w), "r"(diff + w), "rm"(top + w) -); -*left = l; -*left_top = tl; -} -#endif - void ff_llviddsp_init_x86(LLVidDSPContext *c) { int cpu_flags = av_get_cpu_flags();