Re: [PATCH v4 1/3] random: add rte_drand() function

Stephen Hemminger Thu, 26 May 2022 13:20:07 -0700

On Thu, 26 May 2022 15:20:29 +0200
Mattias Rönnblom <[email protected]> wrote:


> On 2022-05-25 22:31, Stephen Hemminger wrote:
> > The PIE code and other applications can benefit from having a
> > fast way to get a random floating point value. This new function
> > is equivalent to drand() in the standard library.
> > 
> > Signed-off-by: Stephen Hemminger <[email protected]>
> > ---
> >   app/test/test_rand_perf.c              |  7 +++++
> >   doc/guides/rel_notes/release_22_07.rst |  5 ++++
> >   lib/eal/common/rte_random.c            | 41 ++++++++++++++++++++++++++
> >   lib/eal/include/rte_random.h           | 18 +++++++++++
> >   lib/eal/meson.build                    |  3 ++
> >   lib/eal/version.map                    |  1 +
> >   6 files changed, 75 insertions(+)
> > 
> > diff --git a/app/test/test_rand_perf.c b/app/test/test_rand_perf.c
> > index fe797ebfa1ca..26fb1d9a586e 100644
> > --- a/app/test/test_rand_perf.c
> > +++ b/app/test/test_rand_perf.c
> > @@ -20,6 +20,7 @@ static volatile uint64_t vsum;
> >   
> >   enum rand_type {
> >     rand_type_64,
> > +   rand_type_float,
> >     rand_type_bounded_best_case,
> >     rand_type_bounded_worst_case
> >   };
> > @@ -30,6 +31,8 @@ rand_type_desc(enum rand_type rand_type)
> >     switch (rand_type) {
> >     case rand_type_64:
> >             return "Full 64-bit [rte_rand()]";
> > +   case rand_type_float:
> > +           return "Floating point [rte_drand()]";
> >     case rand_type_bounded_best_case:
> >             return "Bounded average best-case [rte_rand_max()]";
> >     case rand_type_bounded_worst_case:
> > @@ -55,6 +58,9 @@ test_rand_perf_type(enum rand_type rand_type)
> >             case rand_type_64:
> >                     sum += rte_rand();
> >                     break;
> > +           case rand_type_float:
> > +                   sum += 1000. * rte_drand();  
> 
> Including this floating point multiplication will lead to an 
> overestimation of rte_drand() latency.
> 
> You could refactor this function to be a macro, and pass the return type 
> to as a parameter to this macro. I did just that, and on both an AMD 
> 5900X and a Cortex-A72 it didn't add more than ~5%, so I don't think 
> it's necessary.
> 
> > +                   break;
> >             case rand_type_bounded_best_case:
> >                     sum += rte_rand_max(BEST_CASE_BOUND);
> >                     break;
> > @@ -83,6 +89,7 @@ test_rand_perf(void)
> >     printf("Pseudo-random number generation latencies:\n");
> >   
> >     test_rand_perf_type(rand_type_64);
> > +   test_rand_perf_type(rand_type_float);
> >     test_rand_perf_type(rand_type_bounded_best_case);
> >     test_rand_perf_type(rand_type_bounded_worst_case);
> >   
> > diff --git a/doc/guides/rel_notes/release_22_07.rst 
> > b/doc/guides/rel_notes/release_22_07.rst
> > index e49cacecefd4..b131ea577226 100644
> > --- a/doc/guides/rel_notes/release_22_07.rst
> > +++ b/doc/guides/rel_notes/release_22_07.rst
> > @@ -104,6 +104,11 @@ New Features
> >     * ``RTE_EVENT_QUEUE_ATTR_WEIGHT``
> >     * ``RTE_EVENT_QUEUE_ATTR_AFFINITY``
> >   
> > +* ** Added function get random floating point number.**
> > +
> > +  Added the function ``rte_drand()`` to provide a pseudo-random
> > +  floating point number.
> > +
> >   
> >   Removed Items
> >   -------------
> > diff --git a/lib/eal/common/rte_random.c b/lib/eal/common/rte_random.c
> > index 4535cc980cec..3dc3484ee655 100644
> > --- a/lib/eal/common/rte_random.c
> > +++ b/lib/eal/common/rte_random.c
> > @@ -6,6 +6,9 @@
> >   #include <x86intrin.h>
> >   #endif
> >   #include <unistd.h>
> > +#ifdef RTE_LIBEAL_USE_IEEE754
> > +#include <ieee754.h>
> > +#endif
> >   
> >   #include <rte_branch_prediction.h>
> >   #include <rte_cycles.h>
> > @@ -173,6 +176,44 @@ rte_rand_max(uint64_t upper_bound)
> >     return res;
> >   }
> >   
> > +double
> > +rte_drand(void)
> > +{
> > +   struct rte_rand_state *state = __rte_rand_get_state();
> > +   uint64_t rand64 = __rte_rand_lfsr258(state);
> > +#ifdef RTE_LIBEAL_USE_IEEE754
> > +   union ieee754_double u = {
> > +           .ieee = {
> > +                   .negative = 0,
> > +                   .exponent = IEEE754_DOUBLE_BIAS,
> > +           },
> > +   };
> > +
> > +   /* Take 64 bit random value and put it into the mantissa
> > +    * This uses direct access to IEEE format to avoid doing
> > +    * any direct floating point math here.
> > +    */
> > +   u.ieee.mantissa0 = rand64 >> 32;
> > +   u.ieee.mantissa1 = rand64;
> > +
> > +   return u.d - 1.0;
> > +#else
> > +   /* Slower method requiring floating point divide
> > +    *  
> 
> Do you know how much slower? I ran rand_perf_test on two of my systems.
> 
>                        AMD 5900X     Pi4 (ARM Cortex-A72)
> IEEE754 version          12              1.19
> Non-IEEE754 version      11              1.16
> Naive version*           24              1.16
> 
> * (double)rte_rand() / (double)UINT64_MAX
> 
> Numbers are TSC cycles/op.

On AMD Ryzen 7 both versions take 9 cycles/op with the rand_perf_autotest
So it is a toss up.

The 754 version is:

        ubfx    r1, r1, #0, #20
        orr     r3, r1, #1069547520   << mantissa0
        mov     r2, r0
        orr     r3, r3, #3145728
        vmov.f64        d0, #1.0e+0
        vmov    d16, r2, r3
        vsub.f64        d0, d16, d0   << return u.d - 1.0

Note: the compiler is doing smart optimization on the divide version.
It knows that since denominator is fixed value it can use multiply.

        vmov    d16, r0, r1
        vmul.f64        d0, d16, d0

Re: [PATCH v4 1/3] random: add rte_drand() function

Reply via email to