On Thu, Dec 19, 2019 at 11:35:51AM +0800, Ting Fu wrote: > Tested using this command: > ./ffmpeg -pix_fmt yuv420p -s 1920*1080 -i ArashRawYuv420.yuv \ > -vcodec rawvideo -s 1920*1080 -pix_fmt rgb24 -f null /dev/null >
> The fps increase from 151 to 389 on my local machine. Thats nice but why is there such a difference from changing the way the code is assembled ? This should definitly be explained more detailedly in the commit message > > Signed-off-by: Ting Fu <ting...@intel.com> > --- > libswscale/x86/Makefile | 1 + > libswscale/x86/swscale.c | 16 +- > libswscale/x86/yuv2rgb.c | 81 +++--- > libswscale/x86/yuv2rgb_template.c | 441 ++++++------------------------ > libswscale/x86/yuv_2_rgb.asm | 270 ++++++++++++++++++ > 5 files changed, 395 insertions(+), 414 deletions(-) > create mode 100644 libswscale/x86/yuv_2_rgb.asm > > diff --git a/libswscale/x86/Makefile b/libswscale/x86/Makefile > index f317d5dd9b..831d5359aa 100644 > --- a/libswscale/x86/Makefile > +++ b/libswscale/x86/Makefile > @@ -12,3 +12,4 @@ X86ASM-OBJS += x86/input.o > \ > x86/output.o \ > x86/scale.o \ > x86/rgb_2_rgb.o \ > + x86/yuv_2_rgb.o \ > diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c > index 0eed4f18d5..e9d474a1e8 100644 > --- a/libswscale/x86/swscale.c > +++ b/libswscale/x86/swscale.c > @@ -29,6 +29,14 @@ > #include "libavutil/cpu.h" > #include "libavutil/pixdesc.h" > > +const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = { > + 0x0103010301030103LL, > + 0x0200020002000200LL,}; > + > +const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = { > + 0x0602060206020602LL, > + 0x0004000400040004LL,}; > + > #if HAVE_INLINE_ASM > > #define DITHER1XBPP > @@ -38,14 +46,6 @@ DECLARE_ASM_CONST(8, uint64_t, bFC)= > 0xFCFCFCFCFCFCFCFCLL; > DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL; > DECLARE_ASM_CONST(8, uint64_t, w02)= 0x0002000200020002LL; > > -const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = { > - 0x0103010301030103LL, > - 0x0200020002000200LL,}; > - > -const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = { > - 0x0602060206020602LL, > - 0x0004000400040004LL,}; > - > DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL; > DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL; > DECLARE_ASM_CONST(8, uint64_t, r16Mask)= 0xF800F800F800F800LL; > diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c > index 5e2f77c20f..ed9b613cab 100644 > --- a/libswscale/x86/yuv2rgb.c > +++ b/libswscale/x86/yuv2rgb.c > @@ -37,7 +37,7 @@ > #include "libavutil/x86/cpu.h" > #include "libavutil/cpu.h" > > -#if HAVE_INLINE_ASM > +#if HAVE_X86ASM > > #define DITHER1XBPP // only for MMX > > @@ -50,70 +50,51 @@ DECLARE_ASM_CONST(8, uint64_t, pb_03) = > 0x0303030303030303ULL; > DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; > > //MMX versions > -#if HAVE_MMX_INLINE && HAVE_6REGS > -#undef RENAME > +#if HAVE_MMX > #undef COMPILE_TEMPLATE_MMXEXT > #define COMPILE_TEMPLATE_MMXEXT 0 > -#define RENAME(a) a ## _mmx > -#include "yuv2rgb_template.c" > -#endif /* HAVE_MMX_INLINE && HAVE_6REGS */ > +#endif /* HAVE_MMX */ > > // MMXEXT versions > -#if HAVE_MMXEXT_INLINE && HAVE_6REGS > -#undef RENAME > +#if HAVE_MMXEXT > #undef COMPILE_TEMPLATE_MMXEXT > #define COMPILE_TEMPLATE_MMXEXT 1 > -#define RENAME(a) a ## _mmxext > -#include "yuv2rgb_template.c" > -#endif /* HAVE_MMXEXT_INLINE && HAVE_6REGS */ > +#endif /* HAVE_MMXEXT */ > > -#endif /* HAVE_INLINE_ASM */ > +#include "yuv2rgb_template.c" > > av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) > { > -#if HAVE_MMX_INLINE && HAVE_6REGS > int cpu_flags = av_get_cpu_flags(); > > -#if HAVE_MMXEXT_INLINE > - if (INLINE_MMXEXT(cpu_flags)) { > - switch (c->dstFormat) { > - case AV_PIX_FMT_RGB24: > - return yuv420_rgb24_mmxext; > - case AV_PIX_FMT_BGR24: > - return yuv420_bgr24_mmxext; > - } > - } > -#endif > - > - if (INLINE_MMX(cpu_flags)) { > + if (EXTERNAL_MMX(cpu_flags) || EXTERNAL_MMXEXT(cpu_flags)) { i would expect EXTERNAL_MMXEXT to imply EXTERNAL_MMX [...] > + case AV_PIX_FMT_RGB24: > + return yuv420_rgb24; [...] > +static inline int yuv420_rgb24(SwsContext *c, const uint8_t *src[], > + int srcStride[], > + int srcSliceY, int srcSliceH, > + uint8_t *dst[], int dstStride[]) > +{ This looks a bit odd, a inline function which only use is taking a pointer to it and returning it [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB Whats the most studid thing your enemy could do ? Blow himself up Whats the most studid thing you could do ? Give up your rights and freedom because your enemy blew himself up.
signature.asc
Description: PGP signature
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".