ffmpeg | branch: master | James Almer <jamr...@gmail.com> | Thu Jun 15 23:20:05 2017 -0300| [b3446862bfdbfc8c500c052e0aa48674c1d9ca9f] | committer: James Almer
x86/vorbisdsp: optimize ff_vorbis_inverse_coupling_sse About 7% faster. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b3446862bfdbfc8c500c052e0aa48674c1d9ca9f --- libavcodec/x86/vorbisdsp.asm | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/libavcodec/x86/vorbisdsp.asm b/libavcodec/x86/vorbisdsp.asm index b25d838868..d952296716 100644 --- a/libavcodec/x86/vorbisdsp.asm +++ b/libavcodec/x86/vorbisdsp.asm @@ -57,13 +57,17 @@ cglobal vorbis_inverse_coupling, 3, 3, 6, mag, ang, block_size %endif INIT_XMM sse -cglobal vorbis_inverse_coupling, 3, 4, 6, mag, ang, block_size, cntr +cglobal vorbis_inverse_coupling, 3, 3, 6, mag, ang, block_size mova m5, [pdw_80000000] - xor cntrq, cntrq + shl block_sized, 2 + add magq, block_sizeq + add angq, block_sizeq + neg block_sizeq + align 16 .loop: - mova m0, [magq+cntrq*4] - mova m1, [angq+cntrq*4] + mova m0, [magq+block_sizeq] + mova m1, [angq+block_sizeq] xorps m2, m2 xorps m3, m3 cmpleps m2, m0 ; m <= 0.0 @@ -75,9 +79,8 @@ align 16 andnps m4, m1 addps m3, m0 ; a = m + ((a < 0) & (a ^ sign(m))) subps m0, m4 ; m = m + ((a > 0) & (a ^ sign(m))) - mova [angq+cntrq*4], m3 - mova [magq+cntrq*4], m0 - add cntrq, 4 - cmp cntrq, block_sizeq + mova [angq+block_sizeq], m3 + mova [magq+block_sizeq], m0 + add block_sizeq, mmsize jl .loop RET _______________________________________________ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog