On 06/10/20 15:55 -0400, Daniel Lemire via Libstdc++ wrote:
The updated patch looks good to me. It is indeed cleaner to have a separate
(static) function.

It might be nice to add a comment to explain the _S_nd function maybe with
a comment like "returns a random value in [0,__range)
without any bias" (or something to that effect).

Otherwise, it is algorithmically correct.

Here's what I've just committed and pushed to the master branch.

As expected, this shows significant improvements for some (but not
all) of the cases in the new test I added earlier today,
testsuite/performance/26_numerics/random_dist.cc

Thanks again for the patch.


commit 98c37d3bacbb2f8bbbe56ed53a9547d3be01b66b
Author: Daniel Lemire <lem...@gmail.com>
Date:   Fri Oct 9 14:09:36 2020

    libstdc++: Optimize uniform_int_distribution using Lemire's algorithm
    
    Co-authored-by: Jonathan Wakely <jwak...@redhat.com>
    
    libstdc++-v3/ChangeLog:
    
            * include/bits/uniform_int_dist.h (uniform_int_distribution::_S_nd):
            New member function implementing Lemire's "nearly divisionless"
            algorithm.
            (uniform_int_distribution::operator()): Use _S_nd when the range
            of the URBG is the full width of the result type.

diff --git a/libstdc++-v3/include/bits/uniform_int_dist.h b/libstdc++-v3/include/bits/uniform_int_dist.h
index 6e1e3d5fc5f..ecb8574864a 100644
--- a/libstdc++-v3/include/bits/uniform_int_dist.h
+++ b/libstdc++-v3/include/bits/uniform_int_dist.h
@@ -234,6 +234,34 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 			const param_type& __p);
 
       param_type _M_param;
+
+      // Lemire's nearly divisionless algorithm.
+      // Returns an unbiased random number from __g downscaled to [0,__range)
+      // using an unsigned type _Wp twice as wide as unsigned type _Up.
+      template<typename _Wp, typename _Urbg, typename _Up>
+	static _Up
+	_S_nd(_Urbg& __g, _Up __range)
+	{
+	  using __gnu_cxx::__int_traits;
+	  static_assert(!__int_traits<_Up>::__is_signed, "U must be unsigned");
+	  static_assert(!__int_traits<_Wp>::__is_signed, "W must be unsigned");
+
+	  // reference: Fast Random Integer Generation in an Interval
+	  // ACM Transactions on Modeling and Computer Simulation 29 (1), 2019
+	  // https://arxiv.org/abs/1805.10941
+	  _Wp __product = _Wp(__g()) * _Wp(__range);
+	  _Up __low = _Up(__product);
+	  if (__low < __range)
+	    {
+	      _Up __threshold = -__range % __range;
+	      while (__low < __threshold)
+		{
+		  __product = _Wp(__g()) * _Wp(__range);
+		  __low = _Up(__product);
+		}
+	    }
+	  return __product >> __gnu_cxx::__int_traits<_Up>::__digits;
+	}
     };
 
   template<typename _IntType>
@@ -256,17 +284,36 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  = __uctype(__param.b()) - __uctype(__param.a());
 
 	__uctype __ret;
-
 	if (__urngrange > __urange)
 	  {
 	    // downscaling
+
 	    const __uctype __uerange = __urange + 1; // __urange can be zero
-	    const __uctype __scaling = __urngrange / __uerange;
-	    const __uctype __past = __uerange * __scaling;
-	    do
-	      __ret = __uctype(__urng()) - __urngmin;
-	    while (__ret >= __past);
-	    __ret /= __scaling;
+
+	    using __gnu_cxx::__int_traits;
+#if __SIZEOF_INT128__
+	    if (__int_traits<__uctype>::__digits == 64
+		&& __urngrange == __int_traits<__uctype>::__max)
+	      {
+		__ret = _S_nd<unsigned __int128>(__urng, __uerange);
+	      }
+	    else
+#endif
+	    if (__int_traits<__uctype>::__digits == 32
+		&& __urngrange == __int_traits<__uctype>::__max)
+	      {
+		__ret = _S_nd<__UINT64_TYPE__>(__urng, __uerange);
+	      }
+	    else
+	      {
+		// fallback case (2 divisions)
+		const __uctype __scaling = __urngrange / __uerange;
+		const __uctype __past = __uerange * __scaling;
+		do
+		  __ret = __uctype(__urng()) - __urngmin;
+		while (__ret >= __past);
+		__ret /= __scaling;
+	      }
 	  }
 	else if (__urngrange < __urange)
 	  {

Reply via email to