The loops are guaranteed to be at least multiples of 8, so this unrolling is safe but allows exploiting execution ports.
For int32 version: 68 -> 58c. --- libavcodec/lossless_audiodsp.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/libavcodec/lossless_audiodsp.c b/libavcodec/lossless_audiodsp.c index ea0568e..e3ea8e1 100644 --- a/libavcodec/lossless_audiodsp.c +++ b/libavcodec/lossless_audiodsp.c @@ -29,10 +29,12 @@ static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, { int res = 0; - while (order--) { + do { res += *v1 * *v2++; *v1++ += mul * *v3++; - } + res += *v1 * *v2++; + *v1++ += mul * *v3++; + } while (order-=2); return res; } @@ -42,10 +44,12 @@ static int32_t scalarproduct_and_madd_int32_c(int16_t *v1, const int32_t *v2, { int res = 0; - while (order--) { + do { + res += *v1 * *v2++; + *v1++ += mul * *v3++; res += *v1 * *v2++; *v1++ += mul * *v3++; - } + } while (order-=2); return res; } -- 2.8.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel