This uses Stein's binary GCD algorithm: https://en.wikipedia.org/wiki/Binary_GCD_algorithm to get a reported 1.7-2.1x speedup over Euclidean GCD on standard architectures. Have not benchmarked, so I can't comment. Note that there may be some room for optimization of the bit operations.
Quick note regarding overflow: yes, subtractions on int64_t can, but the llabs takes care of that. The llabs is also guaranteed to be safe, with no annoying INT64_MIN business since INT64_MIN being a power of 2, is shifted down before being sent to llabs. The binary GCD needs ff_ctzll, an extension of ff_ctz for long long (int64_t). On GCC, this is provided by a built-in. On Microsoft, there is a BitScanForward64 analog of BitScanForward that should work; but I can't confirm. Apparently it is not available on 32 bit builds; so this may or may not work correctly. On Intel, per the documentation there is only an intrinsic for _bit_scan_forward and people have posted on forums regarding _bit_scan_forward64, but often their documentation is woeful. Again, I don't have it, so I can't test. As such, to be safe, for now only the GCC intrinsic is added, the rest use a compiled version. Tested with FATE. Signed-off-by: Ganesh Ajjanagadde <gajjanaga...@gmail.com> --- libavutil/intmath.h | 39 +++++++++++++++++++++++++++++++++++++++ libavutil/mathematics.c | 26 +++++++++++++++++++++----- 2 files changed, 60 insertions(+), 5 deletions(-) diff --git a/libavutil/intmath.h b/libavutil/intmath.h index 08d54a6..63657d9 100644 --- a/libavutil/intmath.h +++ b/libavutil/intmath.h @@ -114,6 +114,9 @@ static av_always_inline av_const int ff_log2_16bit_c(unsigned int v) #ifndef ff_ctz #define ff_ctz(v) __builtin_ctz(v) #endif +#ifndef ff_ctzll +#define ff_ctzll(v) __builtin_ctzll(v) +#endif #endif #endif @@ -158,6 +161,42 @@ static av_always_inline av_const int ff_ctz_c( int v ) #endif #endif +#ifndef ff_ctzll +#define ff_ctzll ff_ctzll_c +static av_always_inline av_const int ff_ctzll_c(long long v) +{ + long long c; + + if (v & 0x1) + return 0; + + c = 1; + if (!(v & 0xffffffff)) { + v >>= 32; + c += 32; + } + if (!(v & 0xffff)) { + v >>= 16; + c += 16; + } + if (!(v & 0xff)) { + v >>= 8; + c += 8; + } + if (!(v & 0xf)) { + v >>= 4; + c += 4; + } + if (!(v & 0x3)) { + v >>= 2; + c += 2; + } + c -= v & 0x1; + + return c; +} +#endif + /** * Trailing zero bit count. * diff --git a/libavutil/mathematics.c b/libavutil/mathematics.c index 252794e..16e4eba 100644 --- a/libavutil/mathematics.c +++ b/libavutil/mathematics.c @@ -27,16 +27,32 @@ #include <limits.h> #include "mathematics.h" +#include "libavutil/intmath.h" #include "libavutil/common.h" #include "avassert.h" #include "version.h" -int64_t av_gcd(int64_t a, int64_t b) -{ - if (b) - return av_gcd(b, a % b); - else +/* Stein's binary GCD algorithm: + * https://en.wikipedia.org/wiki/Binary_GCD_algorithm */ +int64_t av_gcd(int64_t a, int64_t b) { + int za, zb, k; + int64_t u, v; + if (a == 0) + return b; + if (b == 0) return a; + za = ff_ctzll(a); + zb = ff_ctzll(b); + k = FFMIN(za, zb); + u = llabs(a >> za); + v = llabs(b >> zb); + while (u != v) { + if (u > v) + FFSWAP(int64_t, v, u); + v -= u; + v >>= ff_ctzll(v); + } + return u << k; } int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding rnd) -- 2.6.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel