On Fri, Jan 03, 2020 at 06:59:28AM +0000, Fu, Ting wrote: > > > > -----Original Message----- > > From: ffmpeg-devel <ffmpeg-devel-boun...@ffmpeg.org> On Behalf Of > > Michael Niedermayer > > Sent: Friday, December 27, 2019 07:38 PM > > To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> > > Subject: Re: [FFmpeg-devel] [PATCH V4 1/2] libswscale/x86/yuv2rgb: Change > > inline assembly into nasm code > > > > On Thu, Dec 19, 2019 at 11:35:51AM +0800, Ting Fu wrote: > > > Tested using this command: > > > ./ffmpeg -pix_fmt yuv420p -s 1920*1080 -i ArashRawYuv420.yuv \ -vcodec > > > rawvideo -s 1920*1080 -pix_fmt rgb24 -f null /dev/null > > > > > > > > The fps increase from 151 to 389 on my local machine. > > > > Thats nice but why is there such a difference from changing the way the > > code is > > assembled ? > > This should definitly be explained more detailedly in the commit message > > > Hi, Michael > > The fps increasing means mmx compared to C code, not inline compared nasm > one. I will remove it from the commit message next patch version.
please test apples against apples, a benchmark of the inline vs NASM code certainly cannot hurt. Testing unoptimized vs new optimized code is not interresting. Testing old optimized vs new optimized code is interresting > > > > > > > > > Signed-off-by: Ting Fu <ting...@intel.com> > > > --- > > > libswscale/x86/Makefile | 1 + > > > libswscale/x86/swscale.c | 16 +- > > > libswscale/x86/yuv2rgb.c | 81 +++--- > > > libswscale/x86/yuv2rgb_template.c | 441 ++++++------------------------ > > > libswscale/x86/yuv_2_rgb.asm | 270 ++++++++++++++++++ > > > 5 files changed, 395 insertions(+), 414 deletions(-) create mode > > > 100644 libswscale/x86/yuv_2_rgb.asm > > > > > > diff --git a/libswscale/x86/Makefile b/libswscale/x86/Makefile index > > > f317d5dd9b..831d5359aa 100644 > > > --- a/libswscale/x86/Makefile > > > +++ b/libswscale/x86/Makefile > > > @@ -12,3 +12,4 @@ X86ASM-OBJS += x86/input.o > > > \ > > > x86/output.o \ > > > x86/scale.o \ > > > x86/rgb_2_rgb.o \ > > > + x86/yuv_2_rgb.o \ > > > diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c index > > > 0eed4f18d5..e9d474a1e8 100644 > > > --- a/libswscale/x86/swscale.c > > > +++ b/libswscale/x86/swscale.c > > > @@ -29,6 +29,14 @@ > > > #include "libavutil/cpu.h" > > > #include "libavutil/pixdesc.h" > > > > > > +const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = { > > > + 0x0103010301030103LL, > > > + 0x0200020002000200LL,}; > > > + > > > +const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = { > > > + 0x0602060206020602LL, > > > + 0x0004000400040004LL,}; > > > + > > > #if HAVE_INLINE_ASM > > > > > > #define DITHER1XBPP > > > @@ -38,14 +46,6 @@ DECLARE_ASM_CONST(8, uint64_t, bFC)= > > 0xFCFCFCFCFCFCFCFCLL; > > > DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL; > > > DECLARE_ASM_CONST(8, uint64_t, w02)= 0x0002000200020002LL; > > > > > > -const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = { > > > - 0x0103010301030103LL, > > > - 0x0200020002000200LL,}; > > > - > > > -const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = { > > > - 0x0602060206020602LL, > > > - 0x0004000400040004LL,}; > > > - > > > DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL; > > > DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL; > > > DECLARE_ASM_CONST(8, uint64_t, r16Mask)= 0xF800F800F800F800LL; > > > diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c index > > > 5e2f77c20f..ed9b613cab 100644 > > > --- a/libswscale/x86/yuv2rgb.c > > > +++ b/libswscale/x86/yuv2rgb.c > > > @@ -37,7 +37,7 @@ > > > #include "libavutil/x86/cpu.h" > > > #include "libavutil/cpu.h" > > > > > > -#if HAVE_INLINE_ASM > > > +#if HAVE_X86ASM > > > > > > #define DITHER1XBPP // only for MMX > > > > > > @@ -50,70 +50,51 @@ DECLARE_ASM_CONST(8, uint64_t, pb_03) = > > > 0x0303030303030303ULL; DECLARE_ASM_CONST(8, uint64_t, pb_07) = > > > 0x0707070707070707ULL; > > > > > > //MMX versions > > > -#if HAVE_MMX_INLINE && HAVE_6REGS > > > -#undef RENAME > > > +#if HAVE_MMX > > > #undef COMPILE_TEMPLATE_MMXEXT > > > #define COMPILE_TEMPLATE_MMXEXT 0 > > > -#define RENAME(a) a ## _mmx > > > -#include "yuv2rgb_template.c" > > > -#endif /* HAVE_MMX_INLINE && HAVE_6REGS */ > > > +#endif /* HAVE_MMX */ > > > > > > // MMXEXT versions > > > -#if HAVE_MMXEXT_INLINE && HAVE_6REGS > > > -#undef RENAME > > > +#if HAVE_MMXEXT > > > #undef COMPILE_TEMPLATE_MMXEXT > > > #define COMPILE_TEMPLATE_MMXEXT 1 > > > -#define RENAME(a) a ## _mmxext > > > -#include "yuv2rgb_template.c" > > > -#endif /* HAVE_MMXEXT_INLINE && HAVE_6REGS */ > > > +#endif /* HAVE_MMXEXT */ > > > > > > -#endif /* HAVE_INLINE_ASM */ > > > +#include "yuv2rgb_template.c" > > > > > > av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c) { -#if > > > HAVE_MMX_INLINE && HAVE_6REGS > > > int cpu_flags = av_get_cpu_flags(); > > > > > > -#if HAVE_MMXEXT_INLINE > > > - if (INLINE_MMXEXT(cpu_flags)) { > > > - switch (c->dstFormat) { > > > - case AV_PIX_FMT_RGB24: > > > - return yuv420_rgb24_mmxext; > > > - case AV_PIX_FMT_BGR24: > > > - return yuv420_bgr24_mmxext; > > > - } > > > - } > > > -#endif > > > - > > > - if (INLINE_MMX(cpu_flags)) { > > > > > + if (EXTERNAL_MMX(cpu_flags) || EXTERNAL_MMXEXT(cpu_flags)) { > > > > i would expect EXTERNAL_MMXEXT to imply EXTERNAL_MMX > > > > I was thinking the mmx-only processor. Under this circumstance, the mmx-only > processor will not be accelerated. Should that be OK? Or it means I will be > OK for not care much about old mmx-only processor in following patches? no If MMXEXT implies MMX then MMX == (MMX || MMXEXT) thx [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB I have often repented speaking, but never of holding my tongue. -- Xenocrates
signature.asc
Description: PGP signature
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".