On Thu, 22 Jan 2015 01:38:11 +0530 arwa arif <arwaarif1...@gmail.com> wrote:
> From 703cc1887903c2868537e19e99b76927bec07884 Mon Sep 17 00:00:00 2001 > From: Arwa Arif <arwaarif1...@gmail.com> > Date: Mon, 19 Jan 2015 03:56:48 +0530 > Subject: [PATCH] Port mp=eq/eq2 to FFmpeg > > Code adapted from James Darnley's previous commits > [...] > +#if HAVE_MMX && HAVE_6REGS > +static void process_MMX(EQParameters *param, uint8_t *dst, int dst_stride, > + uint8_t *src, int src_stride, int w, int h) > +{ > + int i; > + int pel; > + int dstep = dst_stride - w; > + int sstep = src_stride - w; > + short brvec[4]; > + short contvec[4]; > + > + brvec[0] = brvec[1] = brvec[2] = brvec[3] = param->b; > + contvec[0] = contvec[1] = contvec[2] = contvec[3] = param->c; > + > + while (h--) { > + __asm__ volatile ( > + "movq (%5), %%mm3 \n\t" > + "movq (%6), %%mm4 \n\t" > + "pxor %%mm0, %%mm0 \n\t" > + "movl %4, %%eax \n\t" > + //ASMALIGN(4) > + "1: \n\t" > + "movq (%0), %%mm1 \n\t" > + "movq (%0), %%mm2 \n\t" > + "punpcklbw %%mm0, %%mm1\n\t" > + "punpckhbw %%mm0, %%mm2\n\t" > + "psllw $4, %%mm1 \n\t" > + "psllw $4, %%mm2 \n\t" > + "pmulhw %%mm4, %%mm1 \n\t" > + "pmulhw %%mm4, %%mm2 \n\t" > + "paddw %%mm3, %%mm1 \n\t" > + "paddw %%mm3, %%mm2 \n\t" > + "packuswb %%mm2, %%mm1 \n\t" > + "add $8, %0 \n\t" > + "movq %%mm1, (%1) \n\t" > + "add $8, %1 \n\t" > + "decl %%eax \n\t" > + "jnz 1b \n\t" > + : "=r" (src), "=r" (dst) > + : "0" (src), "1" (dst), "r" (w>>3), "r" (brvec), "r" > (contvec) > + : "%eax" > + ); > + > + for (i = w&7; i; i--) { > + pel = ((*src++ * param->c) >> 12) + param->b; > + if (pel & 768) > + pel = (-pel) >> 31; > + *dst++ = pel; > + } > + > + src += sstep; > + dst += dstep; > + } > + __asm__ volatile ( "emms \n\t" ::: "memory" ); > +} > +#endif IMO the asm shouldn't be added at all. It's not important enough for a ported video filter, but has high maintenance overhead - there's a reason the FFmpeg loathes inline asm, right? How much does the inline asm help with speed? When I tested it on a reasonably modern CPU a while ago, there was barely any advantage. > +av_cold void ff_eq_init_x86(EQContext *eq) > +{ > +#if HAVE_MMX_INLINE > + int cpu_flags = av_get_cpu_flags(); > + > + if (cpu_flags & AV_CPU_FLAG_MMX) { > + eq->process = process_MMX; > + } > +#endif > +} _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel