This patch is to optimize the runtime execution of gcd. Mathematically,
it computes with the same algorithm as before, but subtractions and
branches are rearranged to encourage generation of code that can use
flags from the subtractions for conditional moves. Additionally, most
pairs of integers are coprime, so this patch also includes a check for
one of the integers to be equal to 1, and then it will exit the loop
early in this case.

libstdc++-v3/ChangeLog:

        * include/std/numeric(__gcd): Optimize.
---
I have tested this on x86_64-linux and aarch64-linux. I have tested the
timing with random distributions of small inputs and large inputs on a
couple of machines with -O2 and found decreases in execution time from
20% to 60% depending on the machine and distribution of inputs.

 libstdc++-v3/include/std/numeric | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/libstdc++-v3/include/std/numeric b/libstdc++-v3/include/std/numeric
index c912db4a519..3c9e8387a0e 100644
--- a/libstdc++-v3/include/std/numeric
+++ b/libstdc++-v3/include/std/numeric
@@ -148,19 +148,20 @@ namespace __detail
 
       while (true)
        {
-         if (__m > __n)
-           {
-             _Tp __tmp = __m;
-             __m = __n;
-             __n = __tmp;
-           }
+         _Tp __m_minus_n = __m - __n;
+         if (__m_minus_n == 0)
+           return __m << __k;
 
-         __n -= __m;
+         _Tp __next_m = __m < __n ? __m : __n;
 
-         if (__n == 0)
-           return __m << __k;
+         if (__next_m == 1)
+           return __next_m << __k;
+
+         _Tp __n_minus_m = __n - __m;
+         __n = __n < __m ? __m_minus_n : __n_minus_m;
+         __m = __next_m;
 
-         __n >>= std::__countr_zero(__n);
+         __n >>= std::__countr_zero(__m_minus_n);
        }
     }
 } // namespace __detail
-- 
2.45.2

Reply via email to