> From: Stephen Hemminger [mailto:step...@networkplumber.org] > Sent: Wednesday, 6 September 2023 19.20 > > Move the random number state into thread local storage. > This has a several benefits. > - no false cache sharing from cpu prefetching > - fixes initialization of random state for non-DPDK threads > - fixes unsafe usage of random state by non-DPDK threads > > The initialization of random number state is done by the > lcore (lazy initialization). > > Signed-off-by: Stephen Hemminger <step...@networkplumber.org> > --- > lib/eal/common/rte_random.c | 38 +++++++++++++++++++------------------ > 1 file changed, 20 insertions(+), 18 deletions(-) > > diff --git a/lib/eal/common/rte_random.c b/lib/eal/common/rte_random.c > index 53636331a27b..9657adf6ad3b 100644 > --- a/lib/eal/common/rte_random.c > +++ b/lib/eal/common/rte_random.c > @@ -19,13 +19,14 @@ struct rte_rand_state { > uint64_t z3; > uint64_t z4; > uint64_t z5; > -} __rte_cache_aligned; > + uint64_t seed; > +}; > > -/* One instance each for every lcore id-equipped thread, and one > - * additional instance to be shared by all others threads (i.e., all > - * unregistered non-EAL threads). > - */ > -static struct rte_rand_state rand_states[RTE_MAX_LCORE + 1]; > +/* Global random seed */ > +static uint64_t rte_rand_seed; > + > +/* Per lcore random state. */ > +static RTE_DEFINE_PER_LCORE(struct rte_rand_state, rte_rand_state); > > static uint32_t > __rte_rand_lcg32(uint32_t *seed) > @@ -81,11 +82,7 @@ __rte_srand_lfsr258(uint64_t seed, struct > rte_rand_state *state) > void > rte_srand(uint64_t seed) > { > - unsigned int lcore_id; > - > - /* add lcore_id to seed to avoid having the same sequence */ > - for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) > - __rte_srand_lfsr258(seed + lcore_id, > &rand_states[lcore_id]); > + __atomic_store_n(&rte_rand_seed, seed, __ATOMIC_RELAXED); > } > > static __rte_always_inline uint64_t > @@ -119,15 +116,18 @@ __rte_rand_lfsr258(struct rte_rand_state *state) > static __rte_always_inline > struct rte_rand_state *__rte_rand_get_state(void) > { > - unsigned int idx; > + struct rte_rand_state *rand_state = > &RTE_PER_LCORE(rte_rand_state); > + uint64_t seed; > > - idx = rte_lcore_id(); > + seed = __atomic_load_n(&rte_rand_seed, __ATOMIC_RELAXED); > + if (unlikely(seed != rand_state->seed)) {
Please note that rte_rand_seed lives in a completely different cache line than RTE_PER_LCORE(rte_rand_state), so the comparison with rte_rand_seed requires reading one more cache line than the original implementation, which only uses the cache line holding rand_states[idx]. This is in the hot path. If we could register a per-thread INIT function, the lazy initialization could be avoided, and only one cache line accessed. Or, simply replace "uint64_t seed" with "bool initialized" in the rte_rand_state structure, so the lazy init only needs to read rte_rand_seed if rand_state->initialized is false. > + rand_state->seed = seed; > > - /* last instance reserved for unregistered non-EAL threads */ > - if (unlikely(idx == LCORE_ID_ANY)) > - idx = RTE_MAX_LCORE; > + seed += rte_thread_self().opaque_id; > + __rte_srand_lfsr258(seed, rand_state); > + } > > - return &rand_states[idx]; > + return rand_state; > } > > uint64_t > @@ -227,7 +227,9 @@ RTE_INIT(rte_rand_init) > { > uint64_t seed; > > - seed = __rte_random_initial_seed(); > + do > + seed = __rte_random_initial_seed(); > + while (seed == 0); > > rte_srand(seed); > } > -- > 2.39.2