The branch main has been updated by markj:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=9940c974029ba53fc00696b3fa1784725c48a9e9

commit 9940c974029ba53fc00696b3fa1784725c48a9e9
Author:     Mark Johnston <ma...@freebsd.org>
AuthorDate: 2025-07-07 15:02:36 +0000
Commit:     Mark Johnston <ma...@freebsd.org>
CommitDate: 2025-07-07 15:41:57 +0000

    random: Change the entropy harvest event queuing scheme
    
    The entropy queue stores entropy gathered from environmental sources.
    Periodically (every 100ms currently), the random kthread will drain this
    queue and mix it into the CSPRNG's entropy pool(s).
    
    The old scheme uses a ring buffer with a mutex to serialize producers,
    while the sole consumer, the random kthread, avoids using a mutex on the
    basis that no serialization is needed since nothing else is updating the
    consumer index.  On platforms without total store ordering, however,
    this isn't sufficient: when a producer inserts a queue entry and updates
    `ring.in`, there is no guarantee that the consumer will see the updated
    queue entry upon observing the updated producer index.  That is, the
    update to `ring.in` may be visible before the updated queue entry is
    visible.  As a result, we could end up mixing in zero'ed queue entries,
    though this race is fairly unlikely in practice given how infrequently
    the kthread runs.
    
    The easiest way to fix this is to make the kthread acquire the mutex as
    well, and hold it while processing queue entries.  However, this might
    result in a long hold time if there are many queue entries, and we
    really want the hold times to be short, e.g., to avoid delaying
    interrupt processing.
    
    We could introduce a proper MPSC queue, but this is probably
    overcomplicated for a consumer which runs at 10Hz.
    
    Instead, define two buffers, always with one designated as the "active"
    buffer.  Producers queue entries in the active buffer, and the kthread
    uses the mutex to atomically flip the two buffers, so it can process
    entries from the inactive buffer without holding the mutex.  This
    requires more memory, but keeps mutex hold times short and lets us keep
    the queue implementation very simple.
    
    Reviewed by:    cem
    MFC after:      1 month
    Sponsored by:   Stormshield
    Sponsored by:   Klara, Inc.
    Differential Revision:  https://reviews.freebsd.org/D51112
---
 sys/dev/random/random_harvestq.c | 103 +++++++++++++++++----------------------
 1 file changed, 46 insertions(+), 57 deletions(-)

diff --git a/sys/dev/random/random_harvestq.c b/sys/dev/random/random_harvestq.c
index ee37bda36496..395310b115fb 100644
--- a/sys/dev/random/random_harvestq.c
+++ b/sys/dev/random/random_harvestq.c
@@ -131,30 +131,14 @@ static struct harvest_context {
        /* The context of the kernel thread processing harvested entropy */
        struct proc *hc_kthread_proc;
        /*
-        * Lockless ring buffer holding entropy events
-        * If ring.in == ring.out,
-        *     the buffer is empty.
-        * If ring.in != ring.out,
-        *     the buffer contains harvested entropy.
-        * If (ring.in + 1) == ring.out (mod RANDOM_RING_MAX),
-        *     the buffer is full.
-        *
-        * NOTE: ring.in points to the last added element,
-        * and ring.out points to the last consumed element.
-        *
-        * The ring.in variable needs locking as there are multiple
-        * sources to the ring. Only the sources may change ring.in,
-        * but the consumer may examine it.
-        *
-        * The ring.out variable does not need locking as there is
-        * only one consumer. Only the consumer may change ring.out,
-        * but the sources may examine it.
+        * A pair of buffers for queued events.  New events are added to the
+        * active queue while the kthread processes the other one in parallel.
         */
-       struct entropy_ring {
+       struct entropy_buffer {
                struct harvest_event ring[RANDOM_RING_MAX];
-               volatile u_int in;
-               volatile u_int out;
-       } hc_entropy_ring;
+               u_int pos;
+       } hc_entropy_buf[2];
+       u_int hc_active_buf;
        struct fast_entropy_accumulator {
                volatile u_int pos;
                uint32_t buf[RANDOM_ACCUM_MAX];
@@ -183,37 +167,41 @@ random_harvestq_fast_process_event(struct harvest_event 
*event)
 static void
 random_kthread(void)
 {
-        u_int maxloop, ring_out, i;
+       struct harvest_context *hc;
 
-       /*
-        * Locking is not needed as this is the only place we modify ring.out, 
and
-        * we only examine ring.in without changing it. Both of these are 
volatile,
-        * and this is a unique thread.
-        */
+       hc = &harvest_context;
        for (random_kthread_control = 1; random_kthread_control;) {
-               /* Deal with events, if any. Restrict the number we do in one 
go. */
-               maxloop = RANDOM_RING_MAX;
-               while (harvest_context.hc_entropy_ring.out != 
harvest_context.hc_entropy_ring.in) {
-                       ring_out = (harvest_context.hc_entropy_ring.out + 
1)%RANDOM_RING_MAX;
-                       
random_harvestq_fast_process_event(harvest_context.hc_entropy_ring.ring + 
ring_out);
-                       harvest_context.hc_entropy_ring.out = ring_out;
-                       if (!--maxloop)
-                               break;
-               }
+               struct entropy_buffer *buf;
+               u_int entries;
+
+               /* Deal with queued events. */
+               RANDOM_HARVEST_LOCK();
+               buf = &hc->hc_entropy_buf[hc->hc_active_buf];
+               entries = buf->pos;
+               buf->pos = 0;
+               hc->hc_active_buf = (hc->hc_active_buf + 1) %
+                   nitems(hc->hc_entropy_buf);
+               RANDOM_HARVEST_UNLOCK();
+               for (u_int i = 0; i < entries; i++)
+                       random_harvestq_fast_process_event(&buf->ring[i]);
+
+               /* Poll sources of noise. */
                random_sources_feed();
+
                /* XXX: FIX!! Increase the high-performance data rate? Need 
some measurements first. */
-               for (i = 0; i < RANDOM_ACCUM_MAX; i++) {
-                       if (harvest_context.hc_entropy_fast_accumulator.buf[i]) 
{
-                               
random_harvest_direct(harvest_context.hc_entropy_fast_accumulator.buf + i, 
sizeof(harvest_context.hc_entropy_fast_accumulator.buf[0]), RANDOM_UMA);
-                               
harvest_context.hc_entropy_fast_accumulator.buf[i] = 0;
+               for (u_int i = 0; i < RANDOM_ACCUM_MAX; i++) {
+                       if (hc->hc_entropy_fast_accumulator.buf[i]) {
+                               
random_harvest_direct(&hc->hc_entropy_fast_accumulator.buf[i],
+                                   
sizeof(hc->hc_entropy_fast_accumulator.buf[0]), RANDOM_UMA);
+                               hc->hc_entropy_fast_accumulator.buf[i] = 0;
                        }
                }
                /* XXX: FIX!! This is a *great* place to pass hardware/live 
entropy to random(9) */
-               tsleep_sbt(&harvest_context.hc_kthread_proc, 0, "-",
+               tsleep_sbt(&hc->hc_kthread_proc, 0, "-",
                    SBT_1S/RANDOM_KTHREAD_HZ, 0, C_PREL(1));
        }
        random_kthread_control = -1;
-       wakeup(&harvest_context.hc_kthread_proc);
+       wakeup(&hc->hc_kthread_proc);
        kproc_exit(0);
        /* NOTREACHED */
 }
@@ -435,7 +423,7 @@ random_harvestq_init(void *unused __unused)
 
        hc_source_mask = almost_everything_mask;
        RANDOM_HARVEST_INIT_LOCK();
-       harvest_context.hc_entropy_ring.in = 
harvest_context.hc_entropy_ring.out = 0;
+       harvest_context.hc_active_buf = 0;
 }
 SYSINIT(random_device_h_init, SI_SUB_RANDOM, SI_ORDER_THIRD, 
random_harvestq_init, NULL);
 
@@ -540,9 +528,9 @@ SYSUNINIT(random_device_h_init, SI_SUB_RANDOM, 
SI_ORDER_THIRD, random_harvestq_d
  * This is supposed to be fast; do not do anything slow in here!
  * It is also illegal (and morally reprehensible) to insert any
  * high-rate data here. "High-rate" is defined as a data source
- * that will usually cause lots of failures of the "Lockless read"
- * check a few lines below. This includes the "always-on" sources
- * like the Intel "rdrand" or the VIA Nehamiah "xstore" sources.
+ * that is likely to fill up the buffer in much less than 100ms.
+ * This includes the "always-on" sources like the Intel "rdrand"
+ * or the VIA Nehamiah "xstore" sources.
  */
 /* XXXRW: get_cyclecount() is cheap on most modern hardware, where cycle
  * counters are built in, but on older hardware it will do a real time clock
@@ -551,28 +539,29 @@ SYSUNINIT(random_device_h_init, SI_SUB_RANDOM, 
SI_ORDER_THIRD, random_harvestq_d
 void
 random_harvest_queue_(const void *entropy, u_int size, enum 
random_entropy_source origin)
 {
+       struct harvest_context *hc;
+       struct entropy_buffer *buf;
        struct harvest_event *event;
-       u_int ring_in;
 
-       KASSERT(origin >= RANDOM_START && origin < ENTROPYSOURCE, ("%s: origin 
%d invalid\n", __func__, origin));
+       KASSERT(origin >= RANDOM_START && origin < ENTROPYSOURCE,
+           ("%s: origin %d invalid", __func__, origin));
+
+       hc = &harvest_context;
        RANDOM_HARVEST_LOCK();
-       ring_in = (harvest_context.hc_entropy_ring.in + 1)%RANDOM_RING_MAX;
-       if (ring_in != harvest_context.hc_entropy_ring.out) {
-               /* The ring is not full */
-               event = harvest_context.hc_entropy_ring.ring + ring_in;
+       buf = &hc->hc_entropy_buf[hc->hc_active_buf];
+       if (buf->pos < RANDOM_RING_MAX) {
+               event = &buf->ring[buf->pos++];
                event->he_somecounter = random_get_cyclecount();
                event->he_source = origin;
-               event->he_destination = 
harvest_context.hc_destination[origin]++;
+               event->he_destination = hc->hc_destination[origin]++;
                if (size <= sizeof(event->he_entropy)) {
                        event->he_size = size;
                        memcpy(event->he_entropy, entropy, size);
-               }
-               else {
+               } else {
                        /* Big event, so squash it */
                        event->he_size = sizeof(event->he_entropy[0]);
                        event->he_entropy[0] = jenkins_hash(entropy, size, 
(uint32_t)(uintptr_t)event);
                }
-               harvest_context.hc_entropy_ring.in = ring_in;
        }
        RANDOM_HARVEST_UNLOCK();
 }

Reply via email to