Linus, George, Florian,

would something in this vein be OK in your opinion ?

- update_process_times still updates the noise
- we don't touch the fast_pool anymore
- we don't read any TSC on hot paths
- we update the noise in xmit from jiffies and a few pointer values instead

I've applied it on top of George's patch rebased to mainline for simplicity.
I've used a separate per_cpu noise variable to keep the net_rand_state static
with its __latent_entropy.

With this I'm even getting very slightly better performance than with
the previous patch (195.7 - 197.8k cps).

What could be improved is the way the input values are mixed (just
added hence commutative for now). I didn't want to call a siphash
round on the hot paths, but just shifting the previous noise value
before adding would work, such as the following for example:

  void prandom_u32_add_noise(a, b, c, d)
  {
        unsigned long *noise = get_cpu_ptr(&net_rand_noise);

  #if BITS_PER_LONG == 64
        *noise = rol64(*noise, 7) + a + b + c + d;
  #else
        *noise = rol32(*noise, 7) + a + b + c + d;
  #endif
        put_cpu_ptr(&net_rand_noise);

  }

Thanks,
Willy

---


diff --git a/drivers/char/random.c b/drivers/char/random.c
index d20ba1b104ca..2a41b21623ae 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1277,7 +1277,6 @@ void add_interrupt_randomness(int irq, int irq_flags)
 
        fast_mix(fast_pool);
        add_interrupt_bench(cycles);
-       this_cpu_add(net_rand_state.s1, fast_pool->pool[cycles & 3]);
 
        if (unlikely(crng_init == 0)) {
                if ((fast_pool->count >= 64) &&
diff --git a/include/linux/prandom.h b/include/linux/prandom.h
index aa16e6468f91..e2b4990f2126 100644
--- a/include/linux/prandom.h
+++ b/include/linux/prandom.h
@@ -20,7 +20,7 @@ struct rnd_state {
        __u32 s1, s2, s3, s4;
 };
 
-DECLARE_PER_CPU(struct rnd_state, net_rand_state);
+DECLARE_PER_CPU(unsigned long, net_rand_noise);
 
 u32 prandom_u32_state(struct rnd_state *state);
 void prandom_bytes_state(struct rnd_state *state, void *buf, size_t nbytes);
@@ -67,6 +67,7 @@ static inline void prandom_seed_state(struct rnd_state 
*state, u64 seed)
        state->s2 = __seed(i,   8U);
        state->s3 = __seed(i,  16U);
        state->s4 = __seed(i, 128U);
+       __this_cpu_add(net_rand_noise, i);
 }
 
 /* Pseudo random number generator from numerical recipes. */
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index ae5029f984a8..2f07c569af77 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1721,7 +1721,7 @@ void update_process_times(int user_tick)
         * non-constant value that's not affine to the number of calls to make
         * sure it's updated when there's some activity (we don't care in idle).
         */
-       this_cpu_add(net_rand_state.s1, rol32(jiffies, 24) + user_tick);
+       __this_cpu_add(net_rand_noise, rol32(jiffies, 24) + user_tick);
 }
 
 /**
diff --git a/lib/random32.c b/lib/random32.c
index 2b048e2ea99f..d74cf1db8cc9 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -320,6 +320,8 @@ struct siprand_state {
 };
 
 static DEFINE_PER_CPU(struct siprand_state, net_rand_state) __latent_entropy;
+DEFINE_PER_CPU(unsigned long, net_rand_noise);
+EXPORT_SYMBOL(net_rand_noise);
 
 #if BITS_PER_LONG == 64
 /*
@@ -334,7 +336,7 @@ static DEFINE_PER_CPU(struct siprand_state, net_rand_state) 
__latent_entropy;
 #define K0 (0x736f6d6570736575 ^ 0x6c7967656e657261 )
 #define K1 (0x646f72616e646f6d ^ 0x7465646279746573 )
 
-#elif BITS_PER_LONG == 23
+#elif BITS_PER_LONG == 32
 /*
  * On 32-bit machines, we use HSipHash, a reduced-width version of SipHash.
  * This is weaker, but 32-bit machines are not used for high-traffic
@@ -374,9 +376,12 @@ static DEFINE_PER_CPU(struct siprand_state, 
net_rand_state) __latent_entropy;
 static u32 siprand_u32(struct siprand_state *s)
 {
        unsigned long v0 = s->v[0], v1 = s->v[1], v2 = s->v[2], v3 = s->v[3];
+       unsigned long n = __this_cpu_read(net_rand_noise);
 
+       v3 ^= n;
        SIPROUND(v0, v1, v2, v3);
        SIPROUND(v0, v1, v2, v3);
+       v0 ^= n;
        s->v[0] = v0;  s->v[1] = v1;  s->v[2] = v2;  s->v[3] = v3;
        return v1 + v3;
 }
diff --git a/net/core/dev.c b/net/core/dev.c
index 7df6c9617321..55a2471cd75b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -144,6 +144,7 @@
 #include <linux/indirect_call_wrapper.h>
 #include <net/devlink.h>
 #include <linux/pm_runtime.h>
+#include <linux/prandom.h>
 
 #include "net-sysfs.h"
 
@@ -3557,6 +3558,7 @@ static int xmit_one(struct sk_buff *skb, struct 
net_device *dev,
                dev_queue_xmit_nit(skb, dev);
 
        len = skb->len;
+       __this_cpu_add(net_rand_noise, (long)skb + (long)dev + (long)txq + len 
+ jiffies);
        trace_net_dev_start_xmit(skb, dev);
        rc = netdev_start_xmit(skb, dev, txq, more);
        trace_net_dev_xmit(skb, rc, dev, len);
@@ -4129,6 +4131,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct 
net_device *sb_dev)
                        if (!skb)
                                goto out;
 
+                       __this_cpu_add(net_rand_noise, (long)skb + (long)dev + 
(long)txq + jiffies);
                        HARD_TX_LOCK(dev, txq, cpu);
 
                        if (!netif_xmit_stopped(txq)) {
@@ -4194,6 +4197,7 @@ int dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
 
        skb_set_queue_mapping(skb, queue_id);
        txq = skb_get_tx_queue(dev, skb);
+       __this_cpu_add(net_rand_noise, (long)skb + (long)dev + (long)txq + 
jiffies);
 
        local_bh_disable();
 

Reply via email to