ffmpeg | branch: master | Clément Bœsch <u...@pkh.me> | Sat Aug 23 20:03:10 2014 +0200| [554d8190624f25cefe079bd7b9ad61a2ade8541a] | committer: Clément Bœsch
avutil/pixelutils: faster pixelutils_sad_16x16 501 to 439 decicycles. See 45c7f3997ea11c3d1007b2126b1c0049a8c27105. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=554d8190624f25cefe079bd7b9ad61a2ade8541a --- libavutil/x86/pixelutils.asm | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/libavutil/x86/pixelutils.asm b/libavutil/x86/pixelutils.asm index 15213d9..7522f24 100644 --- a/libavutil/x86/pixelutils.asm +++ b/libavutil/x86/pixelutils.asm @@ -109,18 +109,24 @@ cglobal pixelutils_sad_16x16, 4,4,0, src1, stride1, src2, stride2 ;------------------------------------------------------------------------------- INIT_XMM sse2 cglobal pixelutils_sad_16x16, 4,4,5, src1, stride1, src2, stride2 - pxor m4, m4 -%rep 8 - movu m0, [src1q] + movu m4, [src1q] + movu m2, [src2q] movu m1, [src1q + stride1q] + movu m3, [src2q + stride2q] + psadbw m4, m2 + psadbw m1, m3 + paddw m4, m1 +%rep 7 + lea src1q, [src1q + 2*stride1q] + lea src2q, [src2q + 2*stride2q] + movu m0, [src1q] movu m2, [src2q] + movu m1, [src1q + stride1q] movu m3, [src2q + stride2q] psadbw m0, m2 psadbw m1, m3 paddw m4, m0 paddw m4, m1 - lea src1q, [src1q + 2*stride1q] - lea src2q, [src2q + 2*stride2q] %endrep movhlps m0, m4 paddw m4, m0 _______________________________________________ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog