On 05/02/15 4:20 PM, Christophe Gisquet wrote: > From: Mickaƫl Raulet <mrau...@insa-rennes.fr> > > Conflicts: > libavcodec/x86/hevc_mc.asm > --- > libavcodec/x86/hevc_mc.asm | 12 ++++-------- > 1 file changed, 4 insertions(+), 8 deletions(-) > > diff --git a/libavcodec/x86/hevc_mc.asm b/libavcodec/x86/hevc_mc.asm > index efb4d1f..e8a5032 100644 > --- a/libavcodec/x86/hevc_mc.asm > +++ b/libavcodec/x86/hevc_mc.asm > @@ -665,11 +665,9 @@ QPEL_TABLE 10, 8, w, avx2 > %if %2 == 8 > packuswb %3, %4 > %else > - pminsw %3, [max_pixels_%2] > - pmaxsw %3, [zero] > + CLIPW %3, [zero], [max_pixels_%2] > %if (%1 > 8 && notcpuflag(avx)) || %1 > 16 > - pminsw %4, [max_pixels_%2] > - pmaxsw %4, [zero] > + CLIPW %4, [zero], [max_pixels_%2]
Many (But not all) of the functions calling these macros have free regs where max_pixels_%2 and zero (or in that case a simple pxor m*, m*) could be stored. It'll probably be faster than reloading these constants inside a loop. But again, that's for a different patch. > %endif > %endif > %endmacro > @@ -1467,8 +1465,7 @@ cglobal hevc_put_hevc_uni_w%1_%2, 6, 6, 7, dst, > dststride, src, srcstride, heigh > %if %2 == 8 > packuswb m0, m0 > %else > - pminsw m0, [max_pixels_%2] > - pmaxsw m0, [zero] > + CLIPW m0, [zero], [max_pixels_%2] > %endif > PEL_%2STORE%1 dstq, m0, m1 > add dstq, dststrideq ; dst += dststride > @@ -1539,8 +1536,7 @@ cglobal hevc_put_hevc_bi_w%1_%2, 5, 7, 10, dst, > dststride, src, srcstride, src2, > %if %2 == 8 > packuswb m0, m0 > %else > - pminsw m0, [max_pixels_%2] > - pmaxsw m0, [zero] > + CLIPW m0, [zero], [max_pixels_%2] > %endif > PEL_%2STORE%1 dstq, m0, m1 > add dstq, dststrideq ; dst += dststride > lgtm otherwise. _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel