This patch is to optimize the runtime execution of gcd. Mathematically, it computes with the same algorithm as before, but subtractions and branches are rearranged to encourage generation of code that can use flags from the subtractions for conditional moves. Additionally, most pairs of integers are coprime, so this patch also includes a check for one of the integers to be equal to 1, and then it will exit the loop early in this case.
libstdc++-v3/ChangeLog: * include/std/numeric(__gcd): Optimize. --- I have tested this on x86_64-linux and aarch64-linux. I have tested the timing with random distributions of small inputs and large inputs on a couple of machines with -O2 and found decreases in execution time from 20% to 60% depending on the machine and distribution of inputs. libstdc++-v3/include/std/numeric | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/libstdc++-v3/include/std/numeric b/libstdc++-v3/include/std/numeric index c912db4a519..3c9e8387a0e 100644 --- a/libstdc++-v3/include/std/numeric +++ b/libstdc++-v3/include/std/numeric @@ -148,19 +148,20 @@ namespace __detail while (true) { - if (__m > __n) - { - _Tp __tmp = __m; - __m = __n; - __n = __tmp; - } + _Tp __m_minus_n = __m - __n; + if (__m_minus_n == 0) + return __m << __k; - __n -= __m; + _Tp __next_m = __m < __n ? __m : __n; - if (__n == 0) - return __m << __k; + if (__next_m == 1) + return __next_m << __k; + + _Tp __n_minus_m = __n - __m; + __n = __n < __m ? __m_minus_n : __n_minus_m; + __m = __next_m; - __n >>= std::__countr_zero(__n); + __n >>= std::__countr_zero(__m_minus_n); } } } // namespace __detail -- 2.45.2