On 2022-05-25 22:31, Stephen Hemminger wrote:
The PIE code and other applications can benefit from having a
fast way to get a random floating point value. This new function
is equivalent to drand() in the standard library.
Signed-off-by: Stephen Hemminger <step...@networkplumber.org>
---
app/test/test_rand_perf.c | 7 +++++
doc/guides/rel_notes/release_22_07.rst | 5 ++++
lib/eal/common/rte_random.c | 41 ++++++++++++++++++++++++++
lib/eal/include/rte_random.h | 18 +++++++++++
lib/eal/meson.build | 3 ++
lib/eal/version.map | 1 +
6 files changed, 75 insertions(+)
diff --git a/app/test/test_rand_perf.c b/app/test/test_rand_perf.c
index fe797ebfa1ca..26fb1d9a586e 100644
--- a/app/test/test_rand_perf.c
+++ b/app/test/test_rand_perf.c
@@ -20,6 +20,7 @@ static volatile uint64_t vsum;
enum rand_type {
rand_type_64,
+ rand_type_float,
rand_type_bounded_best_case,
rand_type_bounded_worst_case
};
@@ -30,6 +31,8 @@ rand_type_desc(enum rand_type rand_type)
switch (rand_type) {
case rand_type_64:
return "Full 64-bit [rte_rand()]";
+ case rand_type_float:
+ return "Floating point [rte_drand()]";
case rand_type_bounded_best_case:
return "Bounded average best-case [rte_rand_max()]";
case rand_type_bounded_worst_case:
@@ -55,6 +58,9 @@ test_rand_perf_type(enum rand_type rand_type)
case rand_type_64:
sum += rte_rand();
break;
+ case rand_type_float:
+ sum += 1000. * rte_drand();
Including this floating point multiplication will lead to an
overestimation of rte_drand() latency.
You could refactor this function to be a macro, and pass the return type
to as a parameter to this macro. I did just that, and on both an AMD
5900X and a Cortex-A72 it didn't add more than ~5%, so I don't think
it's necessary.
+ break;
case rand_type_bounded_best_case:
sum += rte_rand_max(BEST_CASE_BOUND);
break;
@@ -83,6 +89,7 @@ test_rand_perf(void)
printf("Pseudo-random number generation latencies:\n");
test_rand_perf_type(rand_type_64);
+ test_rand_perf_type(rand_type_float);
test_rand_perf_type(rand_type_bounded_best_case);
test_rand_perf_type(rand_type_bounded_worst_case);
diff --git a/doc/guides/rel_notes/release_22_07.rst b/doc/guides/rel_notes/release_22_07.rst
index e49cacecefd4..b131ea577226 100644
--- a/doc/guides/rel_notes/release_22_07.rst
+++ b/doc/guides/rel_notes/release_22_07.rst
@@ -104,6 +104,11 @@ New Features
* ``RTE_EVENT_QUEUE_ATTR_WEIGHT``
* ``RTE_EVENT_QUEUE_ATTR_AFFINITY``
+* ** Added function get random floating point number.**
+
+ Added the function ``rte_drand()`` to provide a pseudo-random
+ floating point number.
+
Removed Items
-------------
diff --git a/lib/eal/common/rte_random.c b/lib/eal/common/rte_random.c
index 4535cc980cec..3dc3484ee655 100644
--- a/lib/eal/common/rte_random.c
+++ b/lib/eal/common/rte_random.c
@@ -6,6 +6,9 @@
#include <x86intrin.h>
#endif
#include <unistd.h>
+#ifdef RTE_LIBEAL_USE_IEEE754
+#include <ieee754.h>
+#endif
#include <rte_branch_prediction.h>
#include <rte_cycles.h>
@@ -173,6 +176,44 @@ rte_rand_max(uint64_t upper_bound)
return res;
}
+double
+rte_drand(void)
+{
+ struct rte_rand_state *state = __rte_rand_get_state();
+ uint64_t rand64 = __rte_rand_lfsr258(state);
+#ifdef RTE_LIBEAL_USE_IEEE754
+ union ieee754_double u = {
+ .ieee = {
+ .negative = 0,
+ .exponent = IEEE754_DOUBLE_BIAS,
+ },
+ };
+
+ /* Take 64 bit random value and put it into the mantissa
+ * This uses direct access to IEEE format to avoid doing
+ * any direct floating point math here.
+ */
+ u.ieee.mantissa0 = rand64 >> 32;
+ u.ieee.mantissa1 = rand64;
+
+ return u.d - 1.0;
+#else
+ /* Slower method requiring floating point divide
+ *
Do you know how much slower? I ran rand_perf_test on two of my systems.
AMD 5900X Pi4 (ARM Cortex-A72)
IEEE754 version 12 1.19
Non-IEEE754 version 11 1.16
Naive version* 24 1.16
* (double)rte_rand() / (double)UINT64_MAX
Numbers are TSC cycles/op.
Surprisingly, it seems like the IEEE754 version is slower on both of
these machines.
Do you have a machine (or a different use case) where the supposedly
more optimized version actually runs faster?
+ * The double mantissa only has 53 bits, so we uniformly mask off the
+ * high 11 bits and then floating-point divide by 2^53 to achieve a
+ * result in [0, 1).
+ *
+ * We are not allowed to emit 1.0, so denom must be one greater than
+ * the possible range of the preceeding step.
+ */
+ static const uint64_t denom = (uint64_t)1 << 53;
Remove "static const". Surely, this can't make a difference (at least
not in a positive direction).
+
+ rand64 &= denom - 1;
+ return (double)rand64 / denom;
+#endif
+}
+
static uint64_t
__rte_random_initial_seed(void)
{
diff --git a/lib/eal/include/rte_random.h b/lib/eal/include/rte_random.h
index 29f5f1325a30..f6541c2b0f08 100644
--- a/lib/eal/include/rte_random.h
+++ b/lib/eal/include/rte_random.h
@@ -65,6 +65,24 @@ rte_rand(void);
uint64_t
rte_rand_max(uint64_t upper_bound);
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Generates a pseudo-random floating point number.
+ *
+ * This function returns a nonnegative double-precision floating random
+ * number uniformly distributed over the interval [0.0, 1.0).
+ *
+ * The generator is not cryptographically secure.
+ * If called from lcore threads, this function is thread-safe.
+ *
+ * @return
+ * A pseudo-random value between 0 and 1.0.
+ */
+__rte_experimental
+double rte_drand(void);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/eal/meson.build b/lib/eal/meson.build
index 056beb946119..e50524901c98 100644
--- a/lib/eal/meson.build
+++ b/lib/eal/meson.build
@@ -32,3 +32,6 @@ endif
if cc.has_function('getentropy', prefix : '#include <unistd.h>')
cflags += '-DRTE_LIBEAL_USE_GETENTROPY'
endif
+if cc.has_header_symbol('ieee754.h', 'union ieee754_double')
+ cflags += '-DRTE_LIBEAL_USE_IEEE754'
+endif
diff --git a/lib/eal/version.map b/lib/eal/version.map
index d49e30bd042f..cfbade9a33e9 100644
--- a/lib/eal/version.map
+++ b/lib/eal/version.map
@@ -422,6 +422,7 @@ EXPERIMENTAL {
rte_intr_type_set;
# added in 22.07
+ rte_drand;
rte_thread_get_affinity_by_id;
rte_thread_self;
rte_thread_set_affinity_by_id;