surf() takes 18256 cycles to generate u32[8*2] and needs ~290 opcodes.

chacha8() either takes 4086 cycles to generate same amount of entropy
with just ~100 opcodes, or 1944 cycles with 320 opcodes depending
on QUARTERROUND() inlining.

`gcc-13.2.0 -Os -mips32r2 -mtune=24kc -mips16` gives following numbers
for MediaTek MT7620N running at 580MHz:

rand16: 1.85 -> 0.294 us/call, 6.3x speedup
rand32: 1.85 -> 0.520 us/call, 3.5x speedup
rand64: 3.60 -> 0.959 us/call, 3.7x speedup
---
 Makefile      |   1 +
 src/charand.c | 193 ++++++++++++++++++++++++++++++++++++++++++++++++++
 src/charand.h |  42 +++++++++++
 src/dnsmasq.h |   9 ++-
 src/util.c    | 117 +++---------------------------
 5 files changed, 250 insertions(+), 112 deletions(-)
 create mode 100644 src/charand.c
 create mode 100644 src/charand.h

diff --git Makefile Makefile
index a9bcab90..b78f03ae 100644
--- Makefile
+++ Makefile
@@ -85,6 +85,7 @@ objs = cache.o rfc1035.o util.o option.o forward.o network.o \
        dhcp-common.o outpacket.o radv.o slaac.o auth.o ipset.o pattern.o \
        domain.o dnssec.o blockdata.o tables.o loop.o inotify.o \
        poll.o rrfilter.o edns0.o arp.o crypto.o dump.o ubus.o \
+       charand.o \
        metrics.o hash-questions.o domain-match.o nftset.o
 
 hdrs = dnsmasq.h config.h dhcp-protocol.h dhcp6-protocol.h \
diff --git src/charand.c src/charand.c
new file mode 100644
index 00000000..a2eed576
--- /dev/null
+++ src/charand.c
@@ -0,0 +1,193 @@
+/* dnsmasq is Copyright (c) 2000-2024 Simon Kelley
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 dated June, 1991, or
+   (at your option) version 3 dated 29 June, 2007.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#include "config.h"
+#include "charand.h"
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <limits.h>
+#include <string.h>
+#include <time.h>
+#include <sys/auxv.h>
+#if !defined(HAVE_GETENTROPY) && defined(RANDFILE)
+# include <stdio.h>
+#endif
+
+// ChaCha8-based random number generator. That's NOT Golang's chacha8rand
+// described at https://github.com/C2SP/C2SP/blob/main/chacha8rand.md
+// as the most popular OpenWRT boxes are not that rich to have SIMD :-)
+
+// Non-standard libc getentropy() might use getrandom() avoiding filesystem 
access, that's great
+// for jails and chroots.  However, a fallback implemetation is required for 
older systems that have
+// no getentropy() in libc.  Also, getentropy() might block if the kernel has 
not initialized random
+// pool yet.  However, dnsmasq is never started that early during the OpenWRT 
boot process (at least).
+
+#if !defined(HAVE_GETENTROPY) && defined(RANDFILE)
+
+static int getentropy_fallback(void *buffer, size_t length)
+{
+  FILE *fd = fopen(RANDFILE, "r")
+  if (!fd)
+    return -1;
+  const size_t done = fread(fd, buffer, 1, length);
+  fclose(fd);
+  return done == length ? 0 : -1;
+}
+
+#define getentropy(a, b) getentropy_fallback(a, b)
+
+#endif // !defined(HAVE_GETENTROPY)
+
+static void chacha8(void);
+
+static uint32_t CHA[16] = { 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 };
+static uint32_t CHAO[16];
+static unsigned int CHAOBYEs = 0;
+
+static void nonce_time(void) { CHA[14] = (uint32_t)time(NULL); };
+static void nonce_proc(void) { CHA[15] = getpid(); };
+
+int charand_init(void)
+{
+  if (getentropy(CHA + 4, 8 * sizeof(uint32_t)))
+    return -1;
+  // Overflowing 32-bit counter is trivial, overflowing 64-bit counter takes 
146 years at 4 GHz rate.
+  // So, counter is never reset during the process lifetime.
+  CHA[12] = CHA[13] = 0;
+  nonce_time();
+  nonce_proc();
+  chacha8();
+  return 0;
+}
+
+bool charand_isinit(void)
+{
+  return CHA[15] != 0;
+}
+
+void charand_rekey(void)
+{
+  if (CHAOBYEs < 32)
+    chacha8();
+  for (int i = 0; i < 8; i++)
+    CHA[4+i] ^= CHAO[i];
+  nonce_time();
+  chacha8();
+}
+
+// This atfork handler avoids getentropy() call to preserve system entropy 
pool.
+void charand_atfork_child(void)
+{
+  const uint32_t *r32x4 =
+#ifdef AT_RANDOM
+    (const uint32_t*)getauxval(AT_RANDOM); // Use the entropy OS provides. 
Hopefully, ptr is aligned :-)
+#else
+#   warning No getauxval(AT_RANDOM) available
+    NULL;
+#endif
+  if (r32x4)
+    for (unsigned int i = 0; i < 16 / sizeof(uint32_t); i++)
+      CHA[4+i] ^= r32x4[i]; // scramble 256-bit key with 128-bit process-wide 
random
+  nonce_proc();
+  chacha8();
+  charand_rekey();
+}
+
+static inline uint32_t rotl32(uint32_t x, unsigned b) { return (x << b) | (x 
>> (32 - b)); }
+
+#define QUARTERROUND(a, b, c, d) do { \
+    CHAO[a] += CHAO[b]; CHAO[d] ^= CHAO[a]; CHAO[d] = rotl32(CHAO[d], 16); \
+    CHAO[c] += CHAO[d]; CHAO[b] ^= CHAO[c]; CHAO[b] = rotl32(CHAO[b], 12); \
+    CHAO[a] += CHAO[b]; CHAO[d] ^= CHAO[a]; CHAO[d] = rotl32(CHAO[d],  8); \
+    CHAO[c] += CHAO[d]; CHAO[b] ^= CHAO[c]; CHAO[b] = rotl32(CHAO[b],  7); \
+} while (0)
+
+// These are two options for the ChaCha8 quarter-round implementation of 
different size and speed.
+// There is also a obvious middleground:
+//  static void chaqround(int a, int b, int c, int d) { QUARTERROUND(a, b, c, 
d); }
+// But it takes 3654 cycles and 130 opcodes, so it's kinda pointless.
+
+#ifdef __OPTIMIZE_SIZE__
+
+// 4086 cycles, 100 opcodes for `gcc-13.2.0 -Os -mips32r2 -mtune=24kc -mips16`
+# define quarterround(a, b, c, d) quarterround_1arg((a) | ((b) << 4) | ((c) << 
8) | ((d) << 12))
+
+static void quarterround_1arg(unsigned ndx)
+{
+  const unsigned a = 0x0F & ndx;
+  const unsigned b = 0x0F & (ndx >> 4);
+  const unsigned c = 0x0F & (ndx >> 8);
+  const unsigned d = ndx >> 12;
+  QUARTERROUND(a, b, c, d);
+}
+
+#else
+
+// 1944 cycles, 320 opcodes for the same MIPS32r2 machine
+# define quarterround(a, b, c, d) QUARTERROUND(a, b, c, d)
+
+#endif // __OPTIMIZE_SIZE__
+
+static void chacha8(void)
+{
+  CHA[12]++;
+  CHA[13] += !CHA[12];
+
+  memcpy(CHAO, CHA, sizeof(CHAO));
+
+  for (unsigned i = 4; i; i--) {
+      quarterround(0, 4,  8, 12);
+      quarterround(1, 5,  9, 13);
+      quarterround(2, 6, 10, 14);
+      quarterround(3, 7, 11, 15);
+      quarterround(0, 5, 10, 15);
+      quarterround(1, 6, 11, 12);
+      quarterround(2, 7,  8, 13);
+      quarterround(3, 4,  9, 14);
+  }
+
+  for (unsigned i = 0; i < 16; i++)
+    CHAO[i] += CHA[i];
+
+  CHAOBYEs = sizeof(CHAO);
+}
+
+// rand16() is the most popular in rand*() functions family, it is called once 
per ≈ every DNS request,
+// so it should not waste generated entropy.
+// TODO: count real-world stats for an OpenWRT box.
+uint16_t charand16(void)
+{
+  // CHAOBYEs is always aligned at uint16_t boundary
+  if (!CHAOBYEs) chacha8();
+  CHAOBYEs -= sizeof(uint16_t);
+  return *(uint16_t*)(((uint8_t*)CHAO) + CHAOBYEs);
+}
+
+uint32_t charand32(void)
+{
+  CHAOBYEs &= (UINT_MAX - 3); // skip some to make read aligned
+  if (!CHAOBYEs) chacha8();
+  CHAOBYEs -= sizeof(uint32_t);
+  return *(uint32_t*)(((uint8_t*)CHAO) + CHAOBYEs);
+}
+
+uint64_t charand64(void)
+{
+  CHAOBYEs &= (UINT_MAX - 7); // skip some to make read aligned
+  if (!CHAOBYEs) chacha8();
+  CHAOBYEs -= sizeof(uint64_t);
+  return *(uint64_t*)(((uint8_t*)CHAO) + CHAOBYEs);
+}
diff --git src/charand.h src/charand.h
new file mode 100644
index 00000000..a831426a
--- /dev/null
+++ src/charand.h
@@ -0,0 +1,42 @@
+/* dnsmasq is Copyright (c) 2000-2024 Simon Kelley
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 dated June, 1991, or
+   (at your option) version 3 dated 29 June, 2007.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifndef CHARAND_H_C3BD69196178
+#define CHARAND_H_C3BD69196178
+
+#if !defined(HAVE_GETENTROPY) && !defined(RANDFILE)
+# define RANDFILE "/dev/urandom"
+#endif
+
+#include <stdint.h>
+#include <stdbool.h>
+
+int charand_init(void);
+bool charand_isinit(void);
+void charand_rekey(void);
+void charand_atfork_child(void);
+
+uint16_t charand16(void);
+uint32_t charand32(void);
+uint64_t charand64(void);
+
+static inline uintptr_t charandptr(void)
+{
+  const unsigned szp = sizeof(void*);
+  _Static_assert(szp == 4 || szp == 8, "void* is neither 32-bit nor 64-bit");
+  return szp == 4 ? charand32() : charand64();
+}
+
+#endif // CHARAND_H_C3BD69196178
diff --git src/dnsmasq.h src/dnsmasq.h
index 65c458e9..6421280d 100644
--- src/dnsmasq.h
+++ src/dnsmasq.h
@@ -1450,11 +1450,14 @@ char *ds_digest_name(int digest);
 char *algo_digest_name(int algo);
 char *nsec3_digest_name(int digest);
 
+#include "charand.h"
+
 /* util.c */
 void rand_init(void);
-unsigned short rand16(void);
-u32 rand32(void);
-u64 rand64(void);
+static inline unsigned short rand16(void) { return charand16(); }
+static inline u32 rand32(void) { return charand32(); }
+static inline u64 rand64(void) { return charand64(); }
+static inline uintptr_t randptr(void) { return charandptr(); }
 unsigned int should_reseed(time_t last, time_t now);
 int rr_on_list(struct rrlist *list, unsigned short rr);
 int legal_hostname(char *name);
diff --git src/util.c src/util.c
index 1631328c..63a1bcfd 100644
--- src/util.c
+++ src/util.c
@@ -14,12 +14,8 @@
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
-/* The SURF random number generator was taken from djbdns-1.05, by 
-   Daniel J Bernstein, which is public domain. */
-
 
 #include "dnsmasq.h"
-#include <stdbool.h>
 
 #ifdef HAVE_BROKEN_RTC
 #include <sys/times.h>
@@ -35,111 +31,14 @@
 #include <sys/utsname.h>
 #endif
 
-#ifndef HAVE_GETENTROPY
-// Non-standard libc getentropy() might use getrandom() avoiding filesystem 
access, that's great
-// for jails and chroots.  However, a fallback implemetation is required for 
older systems that have
-// no getentropy() in libc.  Also, getentropy() might block if the kernel has 
not initialized random
-// pool yet.  However, dnsmasq is never started that early during the OpenWRT 
boot process (at least).
-#define getentropy(a, b) getentropy_fallback(a, b)
-static int getentropy_fallback(void *buffer, size_t length)
-{
-  const int fd = open(RANDFILE, O_RDONLY);
-  if (fd == -1)
-    return -1;
-  const int okay = read_write(fd, buffer, length, 1);
-  close(fd);
-  return okay ? 0 : -1;
-}
-#endif // HAVE_GETENTROPY
-
-/* SURF random number generator */
-
-static struct surf_state {
-  u32 seed[32];
-  u32 in[12];
-} surfst;
-static u32 out[8];
-static int outleft = 0;
-
 void rand_init()
 {
-  const u32 * const in = surfst.in;
-  const bool reseed = in[0] || in[1] || in[2] || in[3] || in[4] || in[5] ||
-                      in[6] || in[7] || in[8] || in[9] || in[10] || in[11];
-  struct surf_state next;
-  const unsigned bytes = reseed ? sizeof(next.seed) : sizeof(next);
-  if (getentropy(&next, bytes) != 0)
-    {
-      if (!reseed)
-       die(_("failed to seed the random number generator: %s"), NULL, EC_MISC);
-      else
-       my_syslog(LOG_ERR, _("failed to reseed the random number generator: 
%s"), strerror(errno));
-    }
-  _Static_assert(surfst.in == surfst.seed + countof(surfst.seed)); // No weird 
alignment gaps.
-  for (unsigned int i = 0; i < bytes / sizeof(next.seed[0]); i++)
-    surfst.seed[i] ^= next.seed[i];
-}
-
-static void rand_atfork(pid_t fpid)
-{
-  struct surf_state next;
-  for (unsigned int i = 0; i < countof(next.seed); i++)
-    next.seed[i] = rand32();
-  // Child reseeds the state with generated values.  Parent skips those values 
explicitly as they
-  // might be exposed to an external observer and used to guess something 
about PRNG of the child.
-  if (fpid == 0)
-    for (unsigned int i = 0; i < countof(next.seed); i++)
-      surfst.seed[i] ^= next.seed[i];
-}
-
-#define ROTATE(x,b) (((x) << (b)) | ((x) >> (32 - (b))))
-#define MUSH(i,b) x = t[i] += (((x ^ seed[i]) + sum) ^ ROTATE(x,b));
-
-static void surf(void)
-{
-  u32 * const in = surfst.in;
-  const u32 * const seed = surfst.seed;
-  u32 t[12]; u32 x; u32 sum = 0;
-  int r; int i; int loop;
-
-  // Overflowing 32-bit counter is trivial, overflowing 64-bit counter takes 
146 years
-  // at 4 GHz rate. So let counter be 64-bit as 128-bit counter is excessive.
-  in[0]++; in[1] += !in[0];
-
-  for (i = 0;i < 12;++i) t[i] = in[i] ^ seed[12 + i];
-  for (i = 0;i < 8;++i) out[i] = seed[24 + i];
-  x = t[11];
-  for (loop = 0;loop < 2;++loop) {
-    for (r = 0;r < 16;++r) {
-      sum += 0x9e3779b9;
-      MUSH(0,5) MUSH(1,7) MUSH(2,9) MUSH(3,13)
-      MUSH(4,5) MUSH(5,7) MUSH(6,9) MUSH(7,13)
-      MUSH(8,5) MUSH(9,7) MUSH(10,9) MUSH(11,13)
-    }
-    for (i = 0;i < 8;++i) out[i] ^= t[i + 4];
-  }
-
-  outleft = 8;
-}
-
-unsigned short rand16(void)
-{
-  return (unsigned short)rand32();
-}
-
-u32 rand32(void)
-{
-  if (!outleft)
-    surf();
-  return out[--outleft];
-}
-
-u64 rand64(void)
-{
-  if (outleft < 2)
-    surf();
-  outleft -= 2;
-  return (u64)out[outleft+1] + (((u64)out[outleft]) << 32);
+  if (charand_init() == 0)
+    return;
+  else if (charand_isinit())
+    my_syslog(LOG_ERR, _("failed to reseed the random number generator: %s"), 
strerror(errno));
+  else
+    die(_("failed to seed the random number generator: %s"), NULL, EC_MISC);
 }
 
 // Reseeding hourly to avoid low-entropy condition right after boot that is 
somewhat possible
@@ -379,8 +278,8 @@ void safe_pipe(int *fd, int read_noblock)
 pid_t my_fork()
 {
   const pid_t fpid = fork();
-  if (fpid != -1)
-    rand_atfork(fpid);
+  if (fpid == 0)
+    charand_atfork_child();
   return fpid;
 }
 
-- 
2.34.1


_______________________________________________
Dnsmasq-discuss mailing list
Dnsmasq-discuss@lists.thekelleys.org.uk
https://lists.thekelleys.org.uk/cgi-bin/mailman/listinfo/dnsmasq-discuss

Reply via email to