On 11/8/22, Andreas Rheinhardt <andreas.rheinha...@outlook.com> wrote: > Andreas Rheinhardt: >> postprocess.c currently has C, MMX, MMXEXT, 3DNow as well as >> SSE2 versions of its internal functions. But given that only >> ancient 32-bit x86 CPUs don't support SSE2, the MMX, MMXEXT >> and 3DNow versions are obsolete and are therefore removed by >> this commit. This saves about 56KB here. >> >> (The SSE2 version in particular is not really complete, >> so that it often falls back to MMXEXT (which means that >> there were some identical (apart from the name) MMXEXT >> and SSE2 functions; this duplication no longer exists >> with this commit.) >> >> Signed-off-by: Andreas Rheinhardt <andreas.rheinha...@outlook.com> >> --- >> The PP_CPU_CAPS_MMX(2)/3DNOW could now be deprecated. >> >> libpostproc/postprocess.c | 69 ++-- >> libpostproc/postprocess_template.c | 521 ++--------------------------- >> 2 files changed, 57 insertions(+), 533 deletions(-) >> >> diff --git a/libpostproc/postprocess.c b/libpostproc/postprocess.c >> index 383c691cb4..0586e458b4 100644 >> --- a/libpostproc/postprocess.c >> +++ b/libpostproc/postprocess.c >> @@ -26,28 +26,27 @@ >> */ >> >> /* >> - C MMX MMX2 3DNow AltiVec >> -isVertDC Ec Ec Ec >> -isVertMinMaxOk Ec Ec Ec >> -doVertLowPass E e e Ec >> -doVertDefFilter Ec Ec e e Ec >> -isHorizDC Ec Ec Ec >> -isHorizMinMaxOk a E Ec >> -doHorizLowPass E e e Ec >> -doHorizDefFilter Ec Ec e e Ec >> -do_a_deblock Ec E Ec E >> -deRing E e e* Ecp >> -Vertical RKAlgo1 E a a >> -Horizontal RKAlgo1 a a >> -Vertical X1# a E E >> -Horizontal X1# a E E >> -LinIpolDeinterlace e E E* >> -CubicIpolDeinterlace a e e* >> -LinBlendDeinterlace e E E* >> + C MMX MMX2 AltiVec >> +isVertDC Ec Ec Ec >> +isVertMinMaxOk Ec Ec Ec >> +doVertLowPass E e Ec >> +doVertDefFilter Ec Ec e Ec >> +isHorizDC Ec Ec Ec >> +isHorizMinMaxOk a E Ec >> +doHorizLowPass E e Ec >> +doHorizDefFilter Ec Ec e Ec >> +do_a_deblock Ec E Ec >> +deRing E e Ecp >> +Vertical RKAlgo1 E a >> +Horizontal RKAlgo1 a >> +Vertical X1# a E >> +Horizontal X1# a E >> +LinIpolDeinterlace e E >> +CubicIpolDeinterlace a e >> +LinBlendDeinterlace e E >> MedianDeinterlace# E Ec Ec >> -TempDeNoiser# E e e Ec >> +TempDeNoiser# E e Ec >> >> -* I do not have a 3DNow! CPU -> it is untested, but no one said it does >> not work so it seems to work >> # more or less selfinvented filters so the exactness is not too >> meaningful >> E = Exact implementation >> e = almost exact implementation (slightly different rounding,...) >> @@ -83,7 +82,6 @@ try to unroll inner for(x=0 ... loop to avoid these damn >> if(x ... checks >> #include <stdlib.h> >> #include <string.h> >> //#undef HAVE_MMXEXT_INLINE >> -//#define HAVE_AMD3DNOW_INLINE >> //#undef HAVE_MMX_INLINE >> //#undef ARCH_X86 >> //#define DEBUG_BRIGHTNESS >> @@ -494,7 +492,7 @@ static av_always_inline void do_a_deblock_C(uint8_t >> *src, int step, >> } >> } >> >> -//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one >> +//Note: we have C and SSE2 version (which uses MMX(EXT) when >> advantageous) >> //Plain C versions >> //we always compile C for testing which needs bitexactness >> #define TEMPLATE_PP_C 1 >> @@ -508,27 +506,12 @@ static av_always_inline void do_a_deblock_C(uint8_t >> *src, int step, >> >> #if ARCH_X86 && HAVE_INLINE_ASM >> # if CONFIG_RUNTIME_CPUDETECT >> -# define TEMPLATE_PP_MMX 1 >> -# include "postprocess_template.c" >> -# define TEMPLATE_PP_MMXEXT 1 >> -# include "postprocess_template.c" >> -# define TEMPLATE_PP_3DNOW 1 >> -# include "postprocess_template.c" >> # define TEMPLATE_PP_SSE2 1 >> # include "postprocess_template.c" >> # else >> # if HAVE_SSE2_INLINE >> # define TEMPLATE_PP_SSE2 1 >> # include "postprocess_template.c" >> -# elif HAVE_MMXEXT_INLINE >> -# define TEMPLATE_PP_MMXEXT 1 >> -# include "postprocess_template.c" >> -# elif HAVE_AMD3DNOW_INLINE >> -# define TEMPLATE_PP_3DNOW 1 >> -# include "postprocess_template.c" >> -# elif HAVE_MMX_INLINE >> -# define TEMPLATE_PP_MMX 1 >> -# include "postprocess_template.c" >> # endif >> # endif >> #endif >> @@ -549,21 +532,12 @@ static inline void postProcess(const uint8_t src[], >> int srcStride, uint8_t dst[] >> #if ARCH_X86 && HAVE_INLINE_ASM >> // ordered per speed fastest first >> if (c->cpuCaps & AV_CPU_FLAG_SSE2) pp = >> postProcess_SSE2; >> - else if (c->cpuCaps & AV_CPU_FLAG_MMXEXT) pp = >> postProcess_MMX2; >> - else if (c->cpuCaps & AV_CPU_FLAG_3DNOW) pp = >> postProcess_3DNow; >> - else if (c->cpuCaps & AV_CPU_FLAG_MMX) pp = >> postProcess_MMX; >> #elif HAVE_ALTIVEC >> if (c->cpuCaps & AV_CPU_FLAG_ALTIVEC) pp = >> postProcess_altivec; >> #endif >> #else /* CONFIG_RUNTIME_CPUDETECT */ >> #if HAVE_SSE2_INLINE >> pp = postProcess_SSE2; >> -#elif HAVE_MMXEXT_INLINE >> - pp = postProcess_MMX2; >> -#elif HAVE_AMD3DNOW_INLINE >> - pp = postProcess_3DNow; >> -#elif HAVE_MMX_INLINE >> - pp = postProcess_MMX; >> #elif HAVE_ALTIVEC >> pp = postProcess_altivec; >> #endif >> @@ -877,9 +851,6 @@ av_cold pp_context *pp_get_context(int width, int >> height, int cpuCaps){ >> c->cpuCaps = av_get_cpu_flags(); >> } else { >> c->cpuCaps = 0; >> - if (cpuCaps & PP_CPU_CAPS_MMX) c->cpuCaps |= >> AV_CPU_FLAG_MMX; >> - if (cpuCaps & PP_CPU_CAPS_MMX2) c->cpuCaps |= >> AV_CPU_FLAG_MMXEXT; >> - if (cpuCaps & PP_CPU_CAPS_3DNOW) c->cpuCaps |= >> AV_CPU_FLAG_3DNOW; >> if (cpuCaps & PP_CPU_CAPS_ALTIVEC) c->cpuCaps |= >> AV_CPU_FLAG_ALTIVEC; >> } >> >> diff --git a/libpostproc/postprocess_template.c >> b/libpostproc/postprocess_template.c >> index 9f76b7c587..bcf7bdad66 100644 >> --- a/libpostproc/postprocess_template.c >> +++ b/libpostproc/postprocess_template.c >> @@ -20,7 +20,7 @@ >> >> /** >> * @file >> - * mmx/mmx2/3dnow postprocess code. >> + * mmx/mmx2/sse2 postprocess code. >> */ >> >> #include "libavutil/mem_internal.h" >> @@ -57,14 +57,6 @@ >> # define TEMPLATE_PP_MMXEXT 0 >> #endif >> >> -#ifdef TEMPLATE_PP_3DNOW >> -# undef TEMPLATE_PP_MMX >> -# define TEMPLATE_PP_MMX 1 >> -# define RENAME(a) a ## _3DNow >> -#else >> -# define TEMPLATE_PP_3DNOW 0 >> -#endif >> - >> #ifdef TEMPLATE_PP_SSE2 >> # undef TEMPLATE_PP_MMX >> # define TEMPLATE_PP_MMX 1 >> @@ -82,30 +74,19 @@ >> >> #if TEMPLATE_PP_MMXEXT >> #define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t" >> -#elif TEMPLATE_PP_3DNOW >> -#define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t" >> #endif >> #define PAVGB(a,b) REAL_PAVGB(a,b) >> >> #if TEMPLATE_PP_MMXEXT >> #define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t" >> -#elif TEMPLATE_PP_MMX >> -#define PMINUB(b,a,t) \ >> - "movq " #a ", " #t " \n\t"\ >> - "psubusb " #b ", " #t " \n\t"\ >> - "psubb " #t ", " #a " \n\t" >> #endif >> >> #if TEMPLATE_PP_MMXEXT >> #define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t" >> -#elif TEMPLATE_PP_MMX >> -#define PMAXUB(a,b) \ >> - "psubusb " #a ", " #b " \n\t"\ >> - "paddb " #a ", " #b " \n\t" >> #endif >> >> //FIXME? |255-0| = 1 (should not be a problem ...) >> -#if TEMPLATE_PP_MMX >> +#if TEMPLATE_PP_MMXEXT >> /** >> * Check if the middle 8x8 Block in the given 8x16 block is flat >> */ >> @@ -185,20 +166,8 @@ static inline int RENAME(vertClassify)(const uint8_t >> src[], int stride, PPContex >> "psubusb %%mm3, %%mm4 \n\t" >> >> " \n\t" >> -#if TEMPLATE_PP_MMXEXT >> "pxor %%mm7, %%mm7 \n\t" >> "psadbw %%mm7, %%mm0 \n\t" >> -#else >> - "movq %%mm0, %%mm1 \n\t" >> - "psrlw $8, %%mm0 \n\t" >> - "paddb %%mm1, %%mm0 \n\t" >> - "movq %%mm0, %%mm1 \n\t" >> - "psrlq $16, %%mm0 \n\t" >> - "paddb %%mm1, %%mm0 \n\t" >> - "movq %%mm0, %%mm1 \n\t" >> - "psrlq $32, %%mm0 \n\t" >> - "paddb %%mm1, %%mm0 \n\t" >> -#endif >> "movq %4, %%mm7 \n\t" // QP,..., QP >> "paddusb %%mm7, %%mm7 \n\t" // 2QP ... 2QP >> "psubusb %%mm7, %%mm4 \n\t" // Diff <= 2QP -> >> 0 >> @@ -219,7 +188,7 @@ static inline int RENAME(vertClassify)(const uint8_t >> src[], int stride, PPContex >> return 2; >> } >> } >> -#endif //TEMPLATE_PP_MMX >> +#endif //TEMPLATE_PP_MMXEXT >> >> /** >> * Do a vertical low pass filter on the 8x16 block (only write to the 8x8 >> block in the middle) >> @@ -228,7 +197,7 @@ static inline int RENAME(vertClassify)(const uint8_t >> src[], int stride, PPContex >> #if !TEMPLATE_PP_ALTIVEC >> static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, >> PPContext *c) >> { >> -#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW >> +#if TEMPLATE_PP_MMXEXT >> src+= stride*3; >> __asm__ volatile( //"movv %0 %1 %2\n\t" >> "movq %2, %%mm0 \n\t" // QP,..., QP >> @@ -355,7 +324,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, >> int stride, PPContext *c) >> : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb) >> : "%"FF_REG_a, "%"FF_REG_c >> ); >> -#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW >> +#else //TEMPLATE_PP_MMXEXT >> const int l1= stride; >> const int l2= stride + l1; >> const int l3= stride + l2; >> @@ -394,7 +363,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, >> int stride, PPContext *c) >> >> src++; >> } >> -#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW >> +#endif //TEMPLATE_PP_MMXEXT >> } >> #endif //TEMPLATE_PP_ALTIVEC >> >> @@ -407,7 +376,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, >> int stride, PPContext *c) >> */ >> static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, >> PPContext *co) >> { >> -#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW >> +#if TEMPLATE_PP_MMXEXT >> src+= stride*3; >> >> __asm__ volatile( >> @@ -494,7 +463,7 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, >> int stride, PPContext *co) >> NAMED_CONSTRAINTS_ADD(b01) >> : "%"FF_REG_a, "%"FF_REG_c >> ); >> -#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW >> +#else //TEMPLATE_PP_MMXEXT >> >> const int l1= stride; >> const int l2= stride + l1; >> @@ -528,13 +497,13 @@ static inline void RENAME(vertX1Filter)(uint8_t >> *src, int stride, PPContext *co) >> } >> src++; >> } >> -#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW >> +#endif //TEMPLATE_PP_MMXEXT >> } >> >> #if !TEMPLATE_PP_ALTIVEC >> static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, >> PPContext *c) >> { >> -#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW >> +#if TEMPLATE_PP_MMXEXT >> /* >> uint8_t tmp[16]; >> const int l1= stride; >> @@ -816,239 +785,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t >> src[], int stride, PPContext >> } >> } >> */ >> -#elif TEMPLATE_PP_MMX >> - DECLARE_ALIGNED(8, uint64_t, tmp)[4]; // make space for 4 8-byte >> vars >> - src+= stride*4; >> - __asm__ volatile( >> - "pxor %%mm7, %%mm7 \n\t" >> -// 0 1 2 3 4 5 6 7 >> -// %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 edx+%1 edx+2%1 >> -// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 >> - >> - "movq (%0), %%mm0 \n\t" >> - "movq %%mm0, %%mm1 \n\t" >> - "punpcklbw %%mm7, %%mm0 \n\t" // low part of line >> 0 >> - "punpckhbw %%mm7, %%mm1 \n\t" // high part of >> line 0 >> - >> - "movq (%0, %1), %%mm2 \n\t" >> - "lea (%0, %1, 2), %%"FF_REG_a" \n\t" >> - "movq %%mm2, %%mm3 \n\t" >> - "punpcklbw %%mm7, %%mm2 \n\t" // low part of line >> 1 >> - "punpckhbw %%mm7, %%mm3 \n\t" // high part of >> line 1 >> - >> - "movq (%%"FF_REG_a"), %%mm4 \n\t" >> - "movq %%mm4, %%mm5 \n\t" >> - "punpcklbw %%mm7, %%mm4 \n\t" // low part of line >> 2 >> - "punpckhbw %%mm7, %%mm5 \n\t" // high part of >> line 2 >> - >> - "paddw %%mm0, %%mm0 \n\t" // 2L0 >> - "paddw %%mm1, %%mm1 \n\t" // 2H0 >> - "psubw %%mm4, %%mm2 \n\t" // L1 - L2 >> - "psubw %%mm5, %%mm3 \n\t" // H1 - H2 >> - "psubw %%mm2, %%mm0 \n\t" // 2L0 - L1 + L2 >> - "psubw %%mm3, %%mm1 \n\t" // 2H0 - H1 + H2 >> - >> - "psllw $2, %%mm2 \n\t" // 4L1 - 4L2 >> - "psllw $2, %%mm3 \n\t" // 4H1 - 4H2 >> - "psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2 >> - "psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 >> - >> - "movq (%%"FF_REG_a", %1), %%mm2 \n\t" >> - "movq %%mm2, %%mm3 \n\t" >> - "punpcklbw %%mm7, %%mm2 \n\t" // L3 >> - "punpckhbw %%mm7, %%mm3 \n\t" // H3 >> - >> - "psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2 >> - L3 >> - "psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 >> - H3 >> - "psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2 >> - 2L3 >> - "psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 >> - 2H3 >> - "movq %%mm0, (%3) \n\t" // 2L0 - 5L1 + 5L2 >> - 2L3 >> - "movq %%mm1, 8(%3) \n\t" // 2H0 - 5H1 + 5H2 >> - 2H3 >> - >> - "movq (%%"FF_REG_a", %1, 2), %%mm0 \n\t" >> - "movq %%mm0, %%mm1 \n\t" >> - "punpcklbw %%mm7, %%mm0 \n\t" // L4 >> - "punpckhbw %%mm7, %%mm1 \n\t" // H4 >> - >> - "psubw %%mm0, %%mm2 \n\t" // L3 - L4 >> - "psubw %%mm1, %%mm3 \n\t" // H3 - H4 >> - "movq %%mm2, 16(%3) \n\t" // L3 - L4 >> - "movq %%mm3, 24(%3) \n\t" // H3 - H4 >> - "paddw %%mm4, %%mm4 \n\t" // 2L2 >> - "paddw %%mm5, %%mm5 \n\t" // 2H2 >> - "psubw %%mm2, %%mm4 \n\t" // 2L2 - L3 + L4 >> - "psubw %%mm3, %%mm5 \n\t" // 2H2 - H3 + H4 >> - >> - "lea (%%"FF_REG_a", %1), %0 \n\t" >> - "psllw $2, %%mm2 \n\t" // 4L3 - 4L4 >> - "psllw $2, %%mm3 \n\t" // 4H3 - 4H4 >> - "psubw %%mm2, %%mm4 \n\t" // 2L2 - 5L3 + 5L4 >> - "psubw %%mm3, %%mm5 \n\t" // 2H2 - 5H3 + 5H4 >> -//50 opcodes so far >> - "movq (%0, %1, 2), %%mm2 \n\t" >> - "movq %%mm2, %%mm3 \n\t" >> - "punpcklbw %%mm7, %%mm2 \n\t" // L5 >> - "punpckhbw %%mm7, %%mm3 \n\t" // H5 >> - "psubw %%mm2, %%mm4 \n\t" // 2L2 - 5L3 + 5L4 >> - L5 >> - "psubw %%mm3, %%mm5 \n\t" // 2H2 - 5H3 + 5H4 >> - H5 >> - "psubw %%mm2, %%mm4 \n\t" // 2L2 - 5L3 + 5L4 >> - 2L5 >> - "psubw %%mm3, %%mm5 \n\t" // 2H2 - 5H3 + 5H4 >> - 2H5 >> - >> - "movq (%%"FF_REG_a", %1, 4), %%mm6 \n\t" >> - "punpcklbw %%mm7, %%mm6 \n\t" // L6 >> - "psubw %%mm6, %%mm2 \n\t" // L5 - L6 >> - "movq (%%"FF_REG_a", %1, 4), %%mm6 \n\t" >> - "punpckhbw %%mm7, %%mm6 \n\t" // H6 >> - "psubw %%mm6, %%mm3 \n\t" // H5 - H6 >> - >> - "paddw %%mm0, %%mm0 \n\t" // 2L4 >> - "paddw %%mm1, %%mm1 \n\t" // 2H4 >> - "psubw %%mm2, %%mm0 \n\t" // 2L4 - L5 + L6 >> - "psubw %%mm3, %%mm1 \n\t" // 2H4 - H5 + H6 >> - >> - "psllw $2, %%mm2 \n\t" // 4L5 - 4L6 >> - "psllw $2, %%mm3 \n\t" // 4H5 - 4H6 >> - "psubw %%mm2, %%mm0 \n\t" // 2L4 - 5L5 + 5L6 >> - "psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6 >> - >> - "movq (%0, %1, 4), %%mm2 \n\t" >> - "movq %%mm2, %%mm3 \n\t" >> - "punpcklbw %%mm7, %%mm2 \n\t" // L7 >> - "punpckhbw %%mm7, %%mm3 \n\t" // H7 >> - >> - "paddw %%mm2, %%mm2 \n\t" // 2L7 >> - "paddw %%mm3, %%mm3 \n\t" // 2H7 >> - "psubw %%mm2, %%mm0 \n\t" // 2L4 - 5L5 + 5L6 >> - 2L7 >> - "psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6 >> - 2H7 >> - >> - "movq (%3), %%mm2 \n\t" // 2L0 - 5L1 + 5L2 >> - 2L3 >> - "movq 8(%3), %%mm3 \n\t" // 2H0 - 5H1 + 5H2 >> - 2H3 >> - >> -#if TEMPLATE_PP_MMXEXT >> - "movq %%mm7, %%mm6 \n\t" // 0 >> - "psubw %%mm0, %%mm6 \n\t" >> - "pmaxsw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 >> - 2L7| >> - "movq %%mm7, %%mm6 \n\t" // 0 >> - "psubw %%mm1, %%mm6 \n\t" >> - "pmaxsw %%mm6, %%mm1 \n\t" // |2H4 - 5H5 + 5H6 >> - 2H7| >> - "movq %%mm7, %%mm6 \n\t" // 0 >> - "psubw %%mm2, %%mm6 \n\t" >> - "pmaxsw %%mm6, %%mm2 \n\t" // |2L0 - 5L1 + 5L2 >> - 2L3| >> - "movq %%mm7, %%mm6 \n\t" // 0 >> - "psubw %%mm3, %%mm6 \n\t" >> - "pmaxsw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 >> - 2H3| >> -#else >> - "movq %%mm7, %%mm6 \n\t" // 0 >> - "pcmpgtw %%mm0, %%mm6 \n\t" >> - "pxor %%mm6, %%mm0 \n\t" >> - "psubw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 >> - 2L7| >> - "movq %%mm7, %%mm6 \n\t" // 0 >> - "pcmpgtw %%mm1, %%mm6 \n\t" >> - "pxor %%mm6, %%mm1 \n\t" >> - "psubw %%mm6, %%mm1 \n\t" // |2H4 - 5H5 + 5H6 >> - 2H7| >> - "movq %%mm7, %%mm6 \n\t" // 0 >> - "pcmpgtw %%mm2, %%mm6 \n\t" >> - "pxor %%mm6, %%mm2 \n\t" >> - "psubw %%mm6, %%mm2 \n\t" // |2L0 - 5L1 + 5L2 >> - 2L3| >> - "movq %%mm7, %%mm6 \n\t" // 0 >> - "pcmpgtw %%mm3, %%mm6 \n\t" >> - "pxor %%mm6, %%mm3 \n\t" >> - "psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 >> - 2H3| >> -#endif >> - >> -#if TEMPLATE_PP_MMXEXT >> - "pminsw %%mm2, %%mm0 \n\t" >> - "pminsw %%mm3, %%mm1 \n\t" >> -#else >> - "movq %%mm0, %%mm6 \n\t" >> - "psubusw %%mm2, %%mm6 \n\t" >> - "psubw %%mm6, %%mm0 \n\t" >> - "movq %%mm1, %%mm6 \n\t" >> - "psubusw %%mm3, %%mm6 \n\t" >> - "psubw %%mm6, %%mm1 \n\t" >> -#endif >> - >> - "movd %2, %%mm2 \n\t" // QP >> - "punpcklbw %%mm7, %%mm2 \n\t" >> - >> - "movq %%mm7, %%mm6 \n\t" // 0 >> - "pcmpgtw %%mm4, %%mm6 \n\t" // sign(2L2 - 5L3 + >> 5L4 - 2L5) >> - "pxor %%mm6, %%mm4 \n\t" >> - "psubw %%mm6, %%mm4 \n\t" // |2L2 - 5L3 + 5L4 >> - 2L5| >> - "pcmpgtw %%mm5, %%mm7 \n\t" // sign(2H2 - 5H3 + >> 5H4 - 2H5) >> - "pxor %%mm7, %%mm5 \n\t" >> - "psubw %%mm7, %%mm5 \n\t" // |2H2 - 5H3 + 5H4 >> - 2H5| >> -// 100 opcodes >> - "psllw $3, %%mm2 \n\t" // 8QP >> - "movq %%mm2, %%mm3 \n\t" // 8QP >> - "pcmpgtw %%mm4, %%mm2 \n\t" >> - "pcmpgtw %%mm5, %%mm3 \n\t" >> - "pand %%mm2, %%mm4 \n\t" >> - "pand %%mm3, %%mm5 \n\t" >> - >> - >> - "psubusw %%mm0, %%mm4 \n\t" // hd >> - "psubusw %%mm1, %%mm5 \n\t" // ld >> - >> - >> - "movq "MANGLE(w05)", %%mm2 \n\t" // 5 >> - "pmullw %%mm2, %%mm4 \n\t" >> - "pmullw %%mm2, %%mm5 \n\t" >> - "movq "MANGLE(w20)", %%mm2 \n\t" // 32 >> - "paddw %%mm2, %%mm4 \n\t" >> - "paddw %%mm2, %%mm5 \n\t" >> - "psrlw $6, %%mm4 \n\t" >> - "psrlw $6, %%mm5 \n\t" >> - >> - "movq 16(%3), %%mm0 \n\t" // L3 - L4 >> - "movq 24(%3), %%mm1 \n\t" // H3 - H4 >> - >> - "pxor %%mm2, %%mm2 \n\t" >> - "pxor %%mm3, %%mm3 \n\t" >> - >> - "pcmpgtw %%mm0, %%mm2 \n\t" // sign (L3-L4) >> - "pcmpgtw %%mm1, %%mm3 \n\t" // sign (H3-H4) >> - "pxor %%mm2, %%mm0 \n\t" >> - "pxor %%mm3, %%mm1 \n\t" >> - "psubw %%mm2, %%mm0 \n\t" // |L3-L4| >> - "psubw %%mm3, %%mm1 \n\t" // |H3-H4| >> - "psrlw $1, %%mm0 \n\t" // |L3 - L4|/2 >> - "psrlw $1, %%mm1 \n\t" // |H3 - H4|/2 >> - >> - "pxor %%mm6, %%mm2 \n\t" >> - "pxor %%mm7, %%mm3 \n\t" >> - "pand %%mm2, %%mm4 \n\t" >> - "pand %%mm3, %%mm5 \n\t" >> - >> -#if TEMPLATE_PP_MMXEXT >> - "pminsw %%mm0, %%mm4 \n\t" >> - "pminsw %%mm1, %%mm5 \n\t" >> -#else >> - "movq %%mm4, %%mm2 \n\t" >> - "psubusw %%mm0, %%mm2 \n\t" >> - "psubw %%mm2, %%mm4 \n\t" >> - "movq %%mm5, %%mm2 \n\t" >> - "psubusw %%mm1, %%mm2 \n\t" >> - "psubw %%mm2, %%mm5 \n\t" >> -#endif >> - "pxor %%mm6, %%mm4 \n\t" >> - "pxor %%mm7, %%mm5 \n\t" >> - "psubw %%mm6, %%mm4 \n\t" >> - "psubw %%mm7, %%mm5 \n\t" >> - "packsswb %%mm5, %%mm4 \n\t" >> - "movq (%0), %%mm0 \n\t" >> - "paddb %%mm4, %%mm0 \n\t" >> - "movq %%mm0, (%0) \n\t" >> - "movq (%0, %1), %%mm0 \n\t" >> - "psubb %%mm4, %%mm0 \n\t" >> - "movq %%mm0, (%0, %1) \n\t" >> - >> - : "+r" (src) >> - : "r" ((x86_reg)stride), "m" (c->pQPb), "r"(tmp) >> - NAMED_CONSTRAINTS_ADD(w05,w20) >> - : "%"FF_REG_a >> - ); >> -#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW >> +#else //TEMPLATE_PP_MMXEXT >> const int l1= stride; >> const int l2= stride + l1; >> const int l3= stride + l2; >> @@ -1086,14 +823,14 @@ static inline void RENAME(doVertDefFilter)(uint8_t >> src[], int stride, PPContext >> } >> src++; >> } >> -#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW >> +#endif //TEMPLATE_PP_MMXEXT >> } >> #endif //TEMPLATE_PP_ALTIVEC >> >> #if !TEMPLATE_PP_ALTIVEC >> static inline void RENAME(dering)(uint8_t src[], int stride, PPContext >> *c) >> { >> -#if HAVE_7REGS && (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) >> +#if HAVE_7REGS && TEMPLATE_PP_MMXEXT >> DECLARE_ALIGNED(8, uint64_t, tmp)[3]; >> __asm__ volatile( >> "pxor %%mm6, %%mm6 \n\t" >> @@ -1113,20 +850,10 @@ static inline void RENAME(dering)(uint8_t src[], >> int stride, PPContext *c) >> >> #undef REAL_FIND_MIN_MAX >> #undef FIND_MIN_MAX >> -#if TEMPLATE_PP_MMXEXT >> #define REAL_FIND_MIN_MAX(addr)\ >> "movq " #addr ", %%mm0 \n\t"\ >> "pminub %%mm0, %%mm7 \n\t"\ >> "pmaxub %%mm0, %%mm6 \n\t" >> -#else >> -#define REAL_FIND_MIN_MAX(addr)\ >> - "movq " #addr ", %%mm0 \n\t"\ >> - "movq %%mm7, %%mm1 \n\t"\ >> - "psubusb %%mm0, %%mm6 \n\t"\ >> - "paddb %%mm0, %%mm6 \n\t"\ >> - "psubusb %%mm0, %%mm1 \n\t"\ >> - "psubb %%mm1, %%mm7 \n\t" >> -#endif >> #define FIND_MIN_MAX(addr) REAL_FIND_MIN_MAX(addr) >> >> FIND_MIN_MAX((%%FF_REGa)) >> @@ -1140,49 +867,20 @@ FIND_MIN_MAX((%0, %1, 8)) >> >> "movq %%mm7, %%mm4 \n\t" >> "psrlq $8, %%mm7 \n\t" >> -#if TEMPLATE_PP_MMXEXT >> "pminub %%mm4, %%mm7 \n\t" // min of pixels >> "pshufw $0xF9, %%mm7, %%mm4 \n\t" >> "pminub %%mm4, %%mm7 \n\t" // min of pixels >> "pshufw $0xFE, %%mm7, %%mm4 \n\t" >> "pminub %%mm4, %%mm7 \n\t" >> -#else >> - "movq %%mm7, %%mm1 \n\t" >> - "psubusb %%mm4, %%mm1 \n\t" >> - "psubb %%mm1, %%mm7 \n\t" >> - "movq %%mm7, %%mm4 \n\t" >> - "psrlq $16, %%mm7 \n\t" >> - "movq %%mm7, %%mm1 \n\t" >> - "psubusb %%mm4, %%mm1 \n\t" >> - "psubb %%mm1, %%mm7 \n\t" >> - "movq %%mm7, %%mm4 \n\t" >> - "psrlq $32, %%mm7 \n\t" >> - "movq %%mm7, %%mm1 \n\t" >> - "psubusb %%mm4, %%mm1 \n\t" >> - "psubb %%mm1, %%mm7 \n\t" >> -#endif >> >> >> "movq %%mm6, %%mm4 \n\t" >> "psrlq $8, %%mm6 \n\t" >> -#if TEMPLATE_PP_MMXEXT >> "pmaxub %%mm4, %%mm6 \n\t" // max of pixels >> "pshufw $0xF9, %%mm6, %%mm4 \n\t" >> "pmaxub %%mm4, %%mm6 \n\t" >> "pshufw $0xFE, %%mm6, %%mm4 \n\t" >> "pmaxub %%mm4, %%mm6 \n\t" >> -#else >> - "psubusb %%mm4, %%mm6 \n\t" >> - "paddb %%mm4, %%mm6 \n\t" >> - "movq %%mm6, %%mm4 \n\t" >> - "psrlq $16, %%mm6 \n\t" >> - "psubusb %%mm4, %%mm6 \n\t" >> - "paddb %%mm4, %%mm6 \n\t" >> - "movq %%mm6, %%mm4 \n\t" >> - "psrlq $32, %%mm6 \n\t" >> - "psubusb %%mm4, %%mm6 \n\t" >> - "paddb %%mm4, %%mm6 \n\t" >> -#endif >> "movq %%mm6, %%mm0 \n\t" // max >> "psubb %%mm7, %%mm6 \n\t" // max - min >> "push %%"FF_REG_a" \n\t" >> @@ -1320,7 +1018,7 @@ DERING_CORE((%0, %1, 8) ,(%%FF_REGd, %1, >> 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5, >> NAMED_CONSTRAINTS_ADD(deringThreshold,b00,b02,b08) >> : "%"FF_REG_a, "%"FF_REG_d >> ); >> -#else // HAVE_7REGS && (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) >> +#else // HAVE_7REGS && TEMPLATE_PP_MMXEXT >> int y; >> int min=255; >> int max=0; >> @@ -1438,7 +1136,7 @@ DERING_CORE((%0, %1, 8) ,(%%FF_REGd, %1, >> 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5, >> // src[0] = src[7]=src[stride*7]=src[stride*7 + 7]=255; >> } >> #endif >> -#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW >> +#endif //TEMPLATE_PP_MMXEXT >> } >> #endif //TEMPLATE_PP_ALTIVEC >> >> @@ -1450,7 +1148,7 @@ DERING_CORE((%0, %1, 8) ,(%%FF_REGd, %1, >> 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5, >> */ >> static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], >> int stride) >> { >> -#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW >> +#if TEMPLATE_PP_MMXEXT >> src+= 4*stride; >> __asm__ volatile( >> "lea (%0, %1), %%"FF_REG_a" \n\t" >> @@ -1503,14 +1201,13 @@ static inline void >> RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int strid >> */ >> static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int >> stride) >> { >> -#if TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW >> +#if TEMPLATE_PP_SSE2 >> src+= stride*3; >> __asm__ volatile( >> "lea (%0, %1), %%"FF_REG_a" \n\t" >> "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_d"\n\t" >> "lea (%%"FF_REG_d", %1, 4), %%"FF_REG_c"\n\t" >> "add %1, %%"FF_REG_c" \n\t" >> -#if TEMPLATE_PP_SSE2 >> "pxor %%xmm7, %%xmm7 \n\t" >> #define REAL_DEINT_CUBIC(a,b,c,d,e)\ >> "movq " #a ", %%xmm0 \n\t"\ >> @@ -1526,33 +1223,6 @@ static inline void >> RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride >> "psubw %%xmm0, %%xmm1 \n\t"\ >> "packuswb %%xmm1, %%xmm1 \n\t"\ >> "movlps %%xmm1, " #c " \n\t" >> -#else //TEMPLATE_PP_SSE2 >> - "pxor %%mm7, %%mm7 \n\t" >> -// 0 1 2 3 4 5 6 7 8 >> 9 10 >> -// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 >> %0+8%1 edx+4%1 ecx >> - >> -#define REAL_DEINT_CUBIC(a,b,c,d,e)\ >> - "movq " #a ", %%mm0 \n\t"\ >> - "movq " #b ", %%mm1 \n\t"\ >> - "movq " #d ", %%mm2 \n\t"\ >> - "movq " #e ", %%mm3 \n\t"\ >> - PAVGB(%%mm2, %%mm1) /* (b+d) /2 */\ >> - PAVGB(%%mm3, %%mm0) /* (a+e) /2 */\ >> - "movq %%mm0, %%mm2 \n\t"\ >> - "punpcklbw %%mm7, %%mm0 \n\t"\ >> - "punpckhbw %%mm7, %%mm2 \n\t"\ >> - "movq %%mm1, %%mm3 \n\t"\ >> - "punpcklbw %%mm7, %%mm1 \n\t"\ >> - "punpckhbw %%mm7, %%mm3 \n\t"\ >> - "psubw %%mm1, %%mm0 \n\t" /* L(a+e - >> (b+d))/2 */\ >> - "psubw %%mm3, %%mm2 \n\t" /* H(a+e - >> (b+d))/2 */\ >> - "psraw $3, %%mm0 \n\t" /* L(a+e - >> (b+d))/16 */\ >> - "psraw $3, %%mm2 \n\t" /* H(a+e - >> (b+d))/16 */\ >> - "psubw %%mm0, %%mm1 \n\t" /* L(9b + 9d - a >> - e)/16 */\ >> - "psubw %%mm2, %%mm3 \n\t" /* H(9b + 9d - a >> - e)/16 */\ >> - "packuswb %%mm3, %%mm1 \n\t"\ >> - "movq %%mm1, " #c " \n\t" >> -#endif //TEMPLATE_PP_SSE2 >> #define DEINT_CUBIC(a,b,c,d,e) REAL_DEINT_CUBIC(a,b,c,d,e) >> >> DEINT_CUBIC((%0) , (%%FF_REGa, %1), (%%FF_REGa, %1, 2), (%0, >> %1, 4) , (%%FF_REGd, %1)) >> @@ -1562,13 +1232,11 @@ DEINT_CUBIC((%%FF_REGd, %1), (%0, %1, 8) , >> (%%FF_REGd, %1, 4), (%%FF_REGc) >> >> : : "r" (src), "r" ((x86_reg)stride) >> : >> -#if TEMPLATE_PP_SSE2 >> XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm7",) >> -#endif >> "%"FF_REG_a, "%"FF_REG_d, "%"FF_REG_c >> ); >> #undef REAL_DEINT_CUBIC >> -#else //TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW >> +#else //TEMPLATE_PP_SSE2 >> int x; >> src+= stride*3; >> for(x=0; x<8; x++){ >> @@ -1578,7 +1246,7 @@ DEINT_CUBIC((%%FF_REGd, %1), (%0, %1, 8) , >> (%%FF_REGd, %1, 4), (%%FF_REGc) >> src[stride*9] = av_clip_uint8((-src[stride*6] + 9*src[stride*8] + >> 9*src[stride*10] - src[stride*12])>>4); >> src++; >> } >> -#endif //TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW >> +#endif //TEMPLATE_PP_SSE2 >> } >> >> /** >> @@ -1590,7 +1258,7 @@ DEINT_CUBIC((%%FF_REGd, %1), (%0, %1, 8) , >> (%%FF_REGd, %1, 4), (%%FF_REGc) >> */ >> static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, >> uint8_t *tmp) >> { >> -#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW >> +#if TEMPLATE_PP_MMXEXT >> src+= stride*4; >> __asm__ volatile( >> "lea (%0, %1), %%"FF_REG_a" \n\t" >> @@ -1639,7 +1307,7 @@ DEINT_FF((%%FF_REGd, %1), (%%FF_REGd, %1, 2), (%0, >> %1, 8) , (%%FF_REGd, %1, 4 >> : : "r" (src), "r" ((x86_reg)stride), "r"(tmp) >> : "%"FF_REG_a, "%"FF_REG_d >> ); >> -#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW >> +#else //TEMPLATE_PP_MMXEXT >> int x; >> src+= stride*4; >> for(x=0; x<8; x++){ >> @@ -1657,7 +1325,7 @@ DEINT_FF((%%FF_REGd, %1), (%%FF_REGd, %1, 2), (%0, >> %1, 8) , (%%FF_REGd, %1, 4 >> >> src++; >> } >> -#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW >> +#endif //TEMPLATE_PP_MMXEXT >> } >> >> /** >> @@ -1669,7 +1337,7 @@ DEINT_FF((%%FF_REGd, %1), (%%FF_REGd, %1, 2), (%0, >> %1, 8) , (%%FF_REGd, %1, 4 >> */ >> static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, >> uint8_t *tmp, uint8_t *tmp2) >> { >> -#if (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS >> +#if TEMPLATE_PP_MMXEXT && HAVE_6REGS >> src+= stride*4; >> __asm__ volatile( >> "lea (%0, %1), %%"FF_REG_a" \n\t" >> @@ -1729,7 +1397,7 @@ DEINT_L5(%%mm1, %%mm0, (%%FF_REGd, %1, 2), (%0, %1, >> 8) , (%%FF_REGd, %1, 4 >> : : "r" (src), "r" ((x86_reg)stride), "r"(tmp), "r"(tmp2) >> : "%"FF_REG_a, "%"FF_REG_d >> ); >> -#else //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS >> +#else //TEMPLATE_PP_MMXEXT && HAVE_6REGS >> int x; >> src+= stride*4; >> for(x=0; x<8; x++){ >> @@ -1758,7 +1426,7 @@ DEINT_L5(%%mm1, %%mm0, (%%FF_REGd, %1, 2), (%0, %1, >> 8) , (%%FF_REGd, %1, 4 >> >> src++; >> } >> -#endif //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS >> +#endif // TEMPLATE_PP_MMXEXT && HAVE_6REGS >> } >> >> /** >> @@ -1770,7 +1438,7 @@ DEINT_L5(%%mm1, %%mm0, (%%FF_REGd, %1, 2), (%0, %1, >> 8) , (%%FF_REGd, %1, 4 >> */ >> static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int >> stride, uint8_t *tmp) >> { >> -#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW >> +#if TEMPLATE_PP_MMXEXT >> src+= 4*stride; >> __asm__ volatile( >> "lea (%0, %1), %%"FF_REG_a" \n\t" >> @@ -1817,7 +1485,7 @@ static inline void >> RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin >> : : "r" (src), "r" ((x86_reg)stride), "r" (tmp) >> : "%"FF_REG_a, "%"FF_REG_d >> ); >> -#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW >> +#else //TEMPLATE_PP_MMXEXT >> int a, b, c, x; >> src+= 4*stride; >> >> @@ -1860,7 +1528,7 @@ static inline void >> RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin >> src += 4; >> tmp += 4; >> } >> -#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW >> +#endif //TEMPLATE_PP_MMXEXT >> } >> >> /** >> @@ -1871,9 +1539,8 @@ static inline void >> RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin >> */ >> static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride) >> { >> -#if TEMPLATE_PP_MMX >> - src+= 4*stride; >> #if TEMPLATE_PP_MMXEXT >> + src+= 4*stride; >> __asm__ volatile( >> "lea (%0, %1), %%"FF_REG_a" \n\t" >> "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_d"\n\t" >> @@ -1922,48 +1589,6 @@ static inline void >> RENAME(deInterlaceMedian)(uint8_t src[], int stride) >> : "%"FF_REG_a, "%"FF_REG_d >> ); >> >> -#else // MMX without MMX2 >> - __asm__ volatile( >> - "lea (%0, %1), %%"FF_REG_a" \n\t" >> - "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_d"\n\t" >> -// 0 1 2 3 4 5 6 7 8 >> 9 >> -// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 >> %0+8%1 edx+4%1 >> - "pxor %%mm7, %%mm7 \n\t" >> - >> -#define REAL_MEDIAN(a,b,c)\ >> - "movq " #a ", %%mm0 \n\t"\ >> - "movq " #b ", %%mm2 \n\t"\ >> - "movq " #c ", %%mm1 \n\t"\ >> - "movq %%mm0, %%mm3 \n\t"\ >> - "movq %%mm1, %%mm4 \n\t"\ >> - "movq %%mm2, %%mm5 \n\t"\ >> - "psubusb %%mm1, %%mm3 \n\t"\ >> - "psubusb %%mm2, %%mm4 \n\t"\ >> - "psubusb %%mm0, %%mm5 \n\t"\ >> - "pcmpeqb %%mm7, %%mm3 \n\t"\ >> - "pcmpeqb %%mm7, %%mm4 \n\t"\ >> - "pcmpeqb %%mm7, %%mm5 \n\t"\ >> - "movq %%mm3, %%mm6 \n\t"\ >> - "pxor %%mm4, %%mm3 \n\t"\ >> - "pxor %%mm5, %%mm4 \n\t"\ >> - "pxor %%mm6, %%mm5 \n\t"\ >> - "por %%mm3, %%mm1 \n\t"\ >> - "por %%mm4, %%mm2 \n\t"\ >> - "por %%mm5, %%mm0 \n\t"\ >> - "pand %%mm2, %%mm0 \n\t"\ >> - "pand %%mm1, %%mm0 \n\t"\ >> - "movq %%mm0, " #b " \n\t" >> -#define MEDIAN(a,b,c) REAL_MEDIAN(a,b,c) >> - >> -MEDIAN((%0) , (%%FF_REGa) , (%%FF_REGa, %1)) >> -MEDIAN((%%FF_REGa, %1), (%%FF_REGa, %1, 2), (%0, %1, 4)) >> -MEDIAN((%0, %1, 4) , (%%FF_REGd) , (%%FF_REGd, %1)) >> -MEDIAN((%%FF_REGd, %1), (%%FF_REGd, %1, 2), (%0, %1, 8)) >> - >> - : : "r" (src), "r" ((x86_reg)stride) >> - : "%"FF_REG_a, "%"FF_REG_d >> - ); >> -#endif //TEMPLATE_PP_MMXEXT >> #else //TEMPLATE_PP_MMX >> int x, y; >> src+= 4*stride; >> @@ -2165,7 +1790,7 @@ static inline void RENAME(tempNoiseReducer)(uint8_t >> *src, int stride, >> >> #define FAST_L2_DIFF >> //#define L1_DIFF //u should change the thresholds too if u try that one >> -#if (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS >> +#if TEMPLATE_PP_MMXEXT && HAVE_6REGS >> __asm__ volatile( >> "lea (%2, %2, 2), %%"FF_REG_a" \n\t" // 3*stride >> "lea (%2, %2, 4), %%"FF_REG_d" \n\t" // 5*stride >> @@ -2454,7 +2079,7 @@ L2_DIFF_CORE((%0, %%FF_REGc) , (%1, %%FF_REGc)) >> NAMED_CONSTRAINTS_ADD(b80) >> : "%"FF_REG_a, "%"FF_REG_d, "%"FF_REG_c, "memory" >> ); >> -#else //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS >> +#else //TEMPLATE_PP_MMXEXT && HAVE_6REGS >> { >> int y; >> int d=0; >> @@ -2537,11 +2162,11 @@ Switch between >> } >> } >> } >> -#endif //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS >> +#endif //TEMPLATE_PP_MMXEXT && HAVE_6REGS >> } >> #endif //TEMPLATE_PP_ALTIVEC >> >> -#if TEMPLATE_PP_MMX >> +#if TEMPLATE_PP_MMXEXT >> /** >> * accurate deblock filter >> */ >> @@ -2945,7 +2570,6 @@ static av_always_inline void >> RENAME(do_a_deblock)(uint8_t *src, int step, int st >> "movq (%4), %%mm2 \n\t" // 2L0 - 5L1 + >> 5L2 - 2L3 >> "movq 8(%4), %%mm3 \n\t" // 2H0 - 5H1 + >> 5H2 - 2H3 >> >> -#if TEMPLATE_PP_MMXEXT >> "movq %%mm7, %%mm6 \n\t" // 0 >> "psubw %%mm0, %%mm6 \n\t" >> "pmaxsw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + >> 5L6 - 2L7| >> @@ -2958,36 +2582,9 @@ static av_always_inline void >> RENAME(do_a_deblock)(uint8_t *src, int step, int st >> "movq %%mm7, %%mm6 \n\t" // 0 >> "psubw %%mm3, %%mm6 \n\t" >> "pmaxsw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + >> 5H2 - 2H3| >> -#else >> - "movq %%mm7, %%mm6 \n\t" // 0 >> - "pcmpgtw %%mm0, %%mm6 \n\t" >> - "pxor %%mm6, %%mm0 \n\t" >> - "psubw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + >> 5L6 - 2L7| >> - "movq %%mm7, %%mm6 \n\t" // 0 >> - "pcmpgtw %%mm1, %%mm6 \n\t" >> - "pxor %%mm6, %%mm1 \n\t" >> - "psubw %%mm6, %%mm1 \n\t" // |2H4 - 5H5 + >> 5H6 - 2H7| >> - "movq %%mm7, %%mm6 \n\t" // 0 >> - "pcmpgtw %%mm2, %%mm6 \n\t" >> - "pxor %%mm6, %%mm2 \n\t" >> - "psubw %%mm6, %%mm2 \n\t" // |2L0 - 5L1 + >> 5L2 - 2L3| >> - "movq %%mm7, %%mm6 \n\t" // 0 >> - "pcmpgtw %%mm3, %%mm6 \n\t" >> - "pxor %%mm6, %%mm3 \n\t" >> - "psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + >> 5H2 - 2H3| >> -#endif >> >> -#if TEMPLATE_PP_MMXEXT >> "pminsw %%mm2, %%mm0 \n\t" >> "pminsw %%mm3, %%mm1 \n\t" >> -#else >> - "movq %%mm0, %%mm6 \n\t" >> - "psubusw %%mm2, %%mm6 \n\t" >> - "psubw %%mm6, %%mm0 \n\t" >> - "movq %%mm1, %%mm6 \n\t" >> - "psubusw %%mm3, %%mm6 \n\t" >> - "psubw %%mm6, %%mm1 \n\t" >> -#endif >> >> "movd %2, %%mm2 \n\t" // QP >> "punpcklbw %%mm7, %%mm2 \n\t" >> @@ -3041,17 +2638,8 @@ static av_always_inline void >> RENAME(do_a_deblock)(uint8_t *src, int step, int st >> "pand %%mm2, %%mm4 \n\t" >> "pand %%mm3, %%mm5 \n\t" >> >> -#if TEMPLATE_PP_MMXEXT >> "pminsw %%mm0, %%mm4 \n\t" >> "pminsw %%mm1, %%mm5 \n\t" >> -#else >> - "movq %%mm4, %%mm2 \n\t" >> - "psubusw %%mm0, %%mm2 \n\t" >> - "psubw %%mm2, %%mm4 \n\t" >> - "movq %%mm5, %%mm2 \n\t" >> - "psubusw %%mm1, %%mm2 \n\t" >> - "psubw %%mm2, %%mm5 \n\t" >> -#endif >> "pxor %%mm6, %%mm4 \n\t" >> "pxor %%mm7, %%mm5 \n\t" >> "psubw %%mm6, %%mm4 \n\t" >> @@ -3088,18 +2676,14 @@ static void RENAME(postProcess)(const uint8_t >> src[], int srcStride, uint8_t dst[ >> static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const >> uint8_t src[], int srcStride, >> int levelFix, int64_t >> *packedOffsetAndScale) >> { >> -#if !TEMPLATE_PP_MMX || !HAVE_6REGS >> - int i; >> -#endif >> if(levelFix){ >> -#if TEMPLATE_PP_MMX && HAVE_6REGS >> +#if TEMPLATE_PP_MMXEXT && HAVE_6REGS >> __asm__ volatile( >> "movq (%%"FF_REG_a"), %%mm2 \n\t" // packedYOffset >> "movq 8(%%"FF_REG_a"), %%mm3 \n\t" // packedYScale >> "lea (%2,%4), %%"FF_REG_a" \n\t" >> "lea (%3,%5), %%"FF_REG_d" \n\t" >> "pxor %%mm4, %%mm4 \n\t" >> -#if TEMPLATE_PP_MMXEXT >> #define REAL_SCALED_CPY(src1, src2, dst1, dst2) >> \ >> "movq " #src1 ", %%mm0 \n\t"\ >> "movq " #src1 ", %%mm5 \n\t"\ >> @@ -3122,34 +2706,6 @@ static inline void RENAME(blockCopy)(uint8_t dst[], >> int dstStride, const uint8_t >> "movq %%mm0, " #dst1 " \n\t"\ >> "movq %%mm1, " #dst2 " \n\t"\ >> >> -#else //TEMPLATE_PP_MMXEXT >> -#define REAL_SCALED_CPY(src1, src2, dst1, dst2) >> \ >> - "movq " #src1 ", %%mm0 \n\t"\ >> - "movq " #src1 ", %%mm5 \n\t"\ >> - "punpcklbw %%mm4, %%mm0 \n\t"\ >> - "punpckhbw %%mm4, %%mm5 \n\t"\ >> - "psubw %%mm2, %%mm0 \n\t"\ >> - "psubw %%mm2, %%mm5 \n\t"\ >> - "movq " #src2 ", %%mm1 \n\t"\ >> - "psllw $6, %%mm0 \n\t"\ >> - "psllw $6, %%mm5 \n\t"\ >> - "pmulhw %%mm3, %%mm0 \n\t"\ >> - "movq " #src2 ", %%mm6 \n\t"\ >> - "pmulhw %%mm3, %%mm5 \n\t"\ >> - "punpcklbw %%mm4, %%mm1 \n\t"\ >> - "punpckhbw %%mm4, %%mm6 \n\t"\ >> - "psubw %%mm2, %%mm1 \n\t"\ >> - "psubw %%mm2, %%mm6 \n\t"\ >> - "psllw $6, %%mm1 \n\t"\ >> - "psllw $6, %%mm6 \n\t"\ >> - "pmulhw %%mm3, %%mm1 \n\t"\ >> - "pmulhw %%mm3, %%mm6 \n\t"\ >> - "packuswb %%mm5, %%mm0 \n\t"\ >> - "packuswb %%mm6, %%mm1 \n\t"\ >> - "movq %%mm0, " #dst1 " \n\t"\ >> - "movq %%mm1, " #dst2 " \n\t"\ >> - >> -#endif //TEMPLATE_PP_MMXEXT >> #define SCALED_CPY(src1, src2, dst1, dst2)\ >> REAL_SCALED_CPY(src1, src2, dst1, dst2) >> >> @@ -3170,7 +2726,7 @@ SCALED_CPY((%%FF_REGa, %4), (%%FF_REGa, %4, 2), >> (%%FF_REGd, %5), (%%FF_REGd, %5, >> : "%"FF_REG_d >> ); >> #else //TEMPLATE_PP_MMX && HAVE_6REGS >> - for(i=0; i<8; i++) >> + for (int i = 0; i < 8; i++) >> memcpy( &(dst[dstStride*i]), >> &(src[srcStride*i]), BLOCK_SIZE); >> #endif //TEMPLATE_PP_MMX && HAVE_6REGS >> @@ -3203,7 +2759,7 @@ SIMPLE_CPY((%%FF_REGa, %2), (%%FF_REGa, %2, 2), >> (%%FF_REGd, %3), (%%FF_REGd, %3, >> : "%"FF_REG_a, "%"FF_REG_d >> ); >> #else //TEMPLATE_PP_MMX && HAVE_6REGS >> - for(i=0; i<8; i++) >> + for (int i = 0; i < 8; i++) >> memcpy( &(dst[dstStride*i]), >> &(src[srcStride*i]), BLOCK_SIZE); >> #endif //TEMPLATE_PP_MMX && HAVE_6REGS >> @@ -3696,9 +3252,7 @@ static void RENAME(postProcess)(const uint8_t src[], >> int srcStride, uint8_t dst[ >> } >> } >> } >> -#if TEMPLATE_PP_3DNOW >> - __asm__ volatile("femms"); >> -#elif TEMPLATE_PP_MMX >> +#if TEMPLATE_PP_MMX >> __asm__ volatile("emms"); >> #endif >> >> @@ -3734,5 +3288,4 @@ static void RENAME(postProcess)(const uint8_t src[], >> int srcStride, uint8_t dst[ >> #undef TEMPLATE_PP_ALTIVEC >> #undef TEMPLATE_PP_MMX >> #undef TEMPLATE_PP_MMXEXT >> -#undef TEMPLATE_PP_3DNOW >> #undef TEMPLATE_PP_SSE2 > > Will apply this patchset tomorrow unless there are objections. >
LGTM > - Andreas > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". > _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".