The branch main has been updated by fuz:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=4a0fc138e5eb343e45388e66698a4765b308a622

commit 4a0fc138e5eb343e45388e66698a4765b308a622
Author:     Robert Clausecker <f...@freebsd.org>
AuthorDate: 2024-11-27 17:37:58 +0000
Commit:     Robert Clausecker <f...@freebsd.org>
CommitDate: 2024-12-02 10:41:11 +0000

    tools: add arc4random_uniform() bias test
    
    This test program executed arc4random_uniform() repeatedly and
    analyzes the distribution of return values, showing how similar
    the parameters of the observed outcome are to the expected
    parameters of an equidistribution.
    
    This cannot be a unit test as it takes quite a while to run and
    lots of memory (~3 GB) to execute.
    
    Reviewed by:    cem
    Approved by:    emaste
    Differential Revision:  https://reviews.freebsd.org/D47659
---
 tools/test/README                |   2 +-
 tools/test/arc4random/biastest.c | 216 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 217 insertions(+), 1 deletion(-)

diff --git a/tools/test/README b/tools/test/README
index a54964621cd8..894aa051c6c1 100644
--- a/tools/test/README
+++ b/tools/test/README
@@ -1,4 +1,3 @@
-
 This directory is for standalone test programs.  For the FreeBSD
 Test Suite, which uses Kyua, please see /usr/src/tests/
 
@@ -7,6 +6,7 @@ and either tries to break it or measures its performance.
 
 Please make a subdir per program, and add a brief description to this file.
 
+arc4random     Bias test for arc4random_uniform()
 auxinfo                Return information on page sizes, CPUs, and OS release 
date.
 devrandom      Programs to test /dev/*random.
 gpioevents     Test delivery of gpio pin-change events to userland.
diff --git a/tools/test/arc4random/biastest.c b/tools/test/arc4random/biastest.c
new file mode 100644
index 000000000000..2fd4363b7869
--- /dev/null
+++ b/tools/test/arc4random/biastest.c
@@ -0,0 +1,216 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Robert Clausecker <f...@freebsd.org>
+ *
+ * biastest.c -- bias test for arc4random_uniform().
+ *
+ * The default configuration of this test has an upper bound of
+ * (3/4) * UINT32_MAX, which should give a high amount of bias in
+ * an incorrect implementation.  If the range reduction is
+ * implemented correctly, the parameters of the statistic should
+ * closely match the expected values.  If not, they'll differ.
+ *
+ * For memory usage reasons, we use an uchar to track the number of
+ * observations per bucket.  If the number of tries is much larger
+ * than upper_bound, the buckets likely overflow.  This is detected
+ * by the test, but will lead to incorrect results.
+ */
+
+#include <assert.h>
+#include <limits.h>
+#include <math.h>
+#include <signal.h>
+#include <stdatomic.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+static void    collect_sample(unsigned char *, long long, uint32_t);
+static void    analyze_sample(const unsigned char *, long long, uint32_t);
+
+static atomic_bool complete = false;
+static long long tries = 5ULL << 32;
+static atomic_llong tries_done = 0;
+
+static void
+usage(const char *argv0)
+{
+       fprintf(stderr, "usage: %s [-n tries] [-t upper_bound]\n", argv0);
+       exit(EXIT_FAILURE);
+}
+
+int
+main(int argc, char *argv[])
+{
+       uint32_t threshold = 3UL << 30;
+       int ch;
+       unsigned char *sample;
+
+       while (ch = getopt(argc, argv, "n:t:"), ch != EOF)
+               switch (ch) {
+               case 'n':
+                       tries = atoll(optarg);
+                       break;
+
+               case 't':
+                       threshold = (uint32_t)atoll(optarg);
+                       break;
+
+               default:
+                       usage(argv[0]);
+               }
+
+       if (optind != argc)
+               usage(argv[0]);
+
+       if (threshold == 0) {
+               fprintf(stderr, "threshold must be between 1 and %lu\n", 
(unsigned long)UINT32_MAX);
+               exit(EXIT_FAILURE);
+       }
+
+       sample = calloc(threshold, 1);
+       if (sample == NULL) {
+               perror("calloc(threshold, 1)");
+               return (EXIT_FAILURE);
+       }
+
+       collect_sample(sample, tries, threshold);
+       analyze_sample(sample, tries, threshold);
+}
+
+static void
+progress(int signo)
+{
+       (void)signo;
+
+       if (!complete) {
+               fprintf(stderr, "\r%10lld of %10lld samples taken (%5.2f%% 
done)",
+                   tries_done, tries, (tries_done * 100.0) / tries);
+
+               signal(SIGALRM, progress);
+               alarm(1);
+       }
+}
+
+static void
+collect_sample(unsigned char *sample, long long tries, uint32_t threshold)
+{
+       long long i;
+       uint32_t x;
+       bool overflowed = false;
+
+       progress(SIGALRM);
+
+       for (i = 0; i < tries; i++) {
+               x = arc4random_uniform(threshold);
+               tries_done++;
+               assert(x < threshold);
+
+               if (sample[x] == UCHAR_MAX) {
+                       if (!overflowed) {
+                               printf("sample table overflow, results will be 
incorrect\n");
+                               overflowed = true;
+                       }
+               } else
+                       sample[x]++;
+       }
+
+       progress(SIGALRM);
+       complete = true;
+       fputc('\n', stderr);
+}
+
+static void
+analyze_sample(const unsigned char *sample, long long tries,  uint32_t 
threshold)
+{
+       double discrepancy, average, variance, total;
+       long long histogram[UCHAR_MAX + 1] = { 0 }, sum, n, median;
+       uint32_t i, i_min, i_max;
+       int min, max;
+
+       printf("distribution properties:\n");
+
+       /* find median, average, deviation, smallest, and largest bucket */
+       total = 0.0;
+       for (i = 0; i < threshold; i++) {
+               histogram[sample[i]]++;
+               total += (double)i * sample[i];
+       }
+
+       average = total / tries;
+
+       variance = 0.0;
+       median = threshold;
+       n = 0;
+       i_min = 0;
+       i_max = 0;
+       min = sample[i_min];
+       max = sample[i_max];
+
+       for (i = 0; i < threshold; i++) {
+               discrepancy = i - average;
+               variance += sample[i] * discrepancy * discrepancy;
+
+               n += sample[i];
+               if (median == threshold && n > tries / 2)
+                       median = i;
+
+               if (sample[i] < min) {
+                       i_min = i;
+                       min = sample[i_min];
+               } else if (sample[i] > max) {
+                       i_max = i;
+                       max = sample[i_max];
+               }
+       }
+
+       variance /= tries;
+       assert(median < threshold);
+
+       printf("\tthreshold:    %lu\n", (unsigned long)threshold);
+       printf("\tobservations: %lld\n", tries);
+       printf("\tleast common: %lu (%d observations)\n", (unsigned long)i_min, 
min);
+       printf("\tmost common:  %lu (%d observations)\n", (unsigned long)i_max, 
max);
+       printf("\tmedian:               %lld (expected %lu)\n", median, 
(unsigned long)threshold / 2);
+       printf("\taverage:      %f (expected %f)\n", average, 0.5 * (threshold 
- 1));
+       printf("\tdeviation:    %f (expected %f)\n\n", sqrt(variance),
+           sqrt(((double)threshold * threshold - 1.0) / 12));
+
+       /* build histogram and analyze it */
+       printf("sample properties:\n");
+
+       /* find median, average, and deviation */
+       average = (double)tries / threshold;
+
+       variance = 0.0;
+       for (i = 0; i < UCHAR_MAX; i++) {
+               discrepancy = i - average;
+               variance += histogram[i] * discrepancy * discrepancy;
+       }
+
+       variance /= threshold;
+
+       n = 0;
+       median = UCHAR_MAX + 1;
+       for (i = 0; i <= UCHAR_MAX; i++) {
+               n += histogram[i];
+               if (n >= threshold / 2) {
+                       median = i;
+                       break;
+               }
+       }
+
+       assert(median <= UCHAR_MAX); /* unreachable */
+
+       printf("\tmedian:               %lld\n", median);
+       printf("\taverage:      %f\n", average);
+       printf("\tdeviation:    %f (expected %f)\n\n", sqrt(variance), 
sqrt(average * (1.0 - 1.0 / threshold)));
+
+       printf("histogram:\n");
+       for (i = 0; i < 256; i++)
+               if (histogram[i] != 0)
+                       printf("\t%3d:\t%lld\n", (int)i, histogram[i]);
+}

Reply via email to