surf() takes 18256 cycles to generate u32[8*2] and needs ~290 opcodes. chacha8() either takes 4086 cycles to generate same amount of entropy with just ~100 opcodes, or 1944 cycles with 320 opcodes depending on QUARTERROUND() inlining.
`gcc-13.2.0 -Os -mips32r2 -mtune=24kc -mips16` gives following numbers for MediaTek MT7620N running at 580MHz: rand16: 1.85 -> 0.294 us/call, 6.3x speedup rand32: 1.85 -> 0.520 us/call, 3.5x speedup rand64: 3.60 -> 0.959 us/call, 3.7x speedup --- Makefile | 1 + src/charand.c | 193 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/charand.h | 42 +++++++++++ src/dnsmasq.h | 9 ++- src/util.c | 117 +++--------------------------- 5 files changed, 250 insertions(+), 112 deletions(-) create mode 100644 src/charand.c create mode 100644 src/charand.h diff --git Makefile Makefile index a9bcab90..b78f03ae 100644 --- Makefile +++ Makefile @@ -85,6 +85,7 @@ objs = cache.o rfc1035.o util.o option.o forward.o network.o \ dhcp-common.o outpacket.o radv.o slaac.o auth.o ipset.o pattern.o \ domain.o dnssec.o blockdata.o tables.o loop.o inotify.o \ poll.o rrfilter.o edns0.o arp.o crypto.o dump.o ubus.o \ + charand.o \ metrics.o hash-questions.o domain-match.o nftset.o hdrs = dnsmasq.h config.h dhcp-protocol.h dhcp6-protocol.h \ diff --git src/charand.c src/charand.c new file mode 100644 index 00000000..a2eed576 --- /dev/null +++ src/charand.c @@ -0,0 +1,193 @@ +/* dnsmasq is Copyright (c) 2000-2024 Simon Kelley + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 dated June, 1991, or + (at your option) version 3 dated 29 June, 2007. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ +#include "config.h" +#include "charand.h" + +#include <sys/types.h> +#include <unistd.h> +#include <limits.h> +#include <string.h> +#include <time.h> +#include <sys/auxv.h> +#if !defined(HAVE_GETENTROPY) && defined(RANDFILE) +# include <stdio.h> +#endif + +// ChaCha8-based random number generator. That's NOT Golang's chacha8rand +// described at https://github.com/C2SP/C2SP/blob/main/chacha8rand.md +// as the most popular OpenWRT boxes are not that rich to have SIMD :-) + +// Non-standard libc getentropy() might use getrandom() avoiding filesystem access, that's great +// for jails and chroots. However, a fallback implemetation is required for older systems that have +// no getentropy() in libc. Also, getentropy() might block if the kernel has not initialized random +// pool yet. However, dnsmasq is never started that early during the OpenWRT boot process (at least). + +#if !defined(HAVE_GETENTROPY) && defined(RANDFILE) + +static int getentropy_fallback(void *buffer, size_t length) +{ + FILE *fd = fopen(RANDFILE, "r") + if (!fd) + return -1; + const size_t done = fread(fd, buffer, 1, length); + fclose(fd); + return done == length ? 0 : -1; +} + +#define getentropy(a, b) getentropy_fallback(a, b) + +#endif // !defined(HAVE_GETENTROPY) + +static void chacha8(void); + +static uint32_t CHA[16] = { 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 }; +static uint32_t CHAO[16]; +static unsigned int CHAOBYEs = 0; + +static void nonce_time(void) { CHA[14] = (uint32_t)time(NULL); }; +static void nonce_proc(void) { CHA[15] = getpid(); }; + +int charand_init(void) +{ + if (getentropy(CHA + 4, 8 * sizeof(uint32_t))) + return -1; + // Overflowing 32-bit counter is trivial, overflowing 64-bit counter takes 146 years at 4 GHz rate. + // So, counter is never reset during the process lifetime. + CHA[12] = CHA[13] = 0; + nonce_time(); + nonce_proc(); + chacha8(); + return 0; +} + +bool charand_isinit(void) +{ + return CHA[15] != 0; +} + +void charand_rekey(void) +{ + if (CHAOBYEs < 32) + chacha8(); + for (int i = 0; i < 8; i++) + CHA[4+i] ^= CHAO[i]; + nonce_time(); + chacha8(); +} + +// This atfork handler avoids getentropy() call to preserve system entropy pool. +void charand_atfork_child(void) +{ + const uint32_t *r32x4 = +#ifdef AT_RANDOM + (const uint32_t*)getauxval(AT_RANDOM); // Use the entropy OS provides. Hopefully, ptr is aligned :-) +#else +# warning No getauxval(AT_RANDOM) available + NULL; +#endif + if (r32x4) + for (unsigned int i = 0; i < 16 / sizeof(uint32_t); i++) + CHA[4+i] ^= r32x4[i]; // scramble 256-bit key with 128-bit process-wide random + nonce_proc(); + chacha8(); + charand_rekey(); +} + +static inline uint32_t rotl32(uint32_t x, unsigned b) { return (x << b) | (x >> (32 - b)); } + +#define QUARTERROUND(a, b, c, d) do { \ + CHAO[a] += CHAO[b]; CHAO[d] ^= CHAO[a]; CHAO[d] = rotl32(CHAO[d], 16); \ + CHAO[c] += CHAO[d]; CHAO[b] ^= CHAO[c]; CHAO[b] = rotl32(CHAO[b], 12); \ + CHAO[a] += CHAO[b]; CHAO[d] ^= CHAO[a]; CHAO[d] = rotl32(CHAO[d], 8); \ + CHAO[c] += CHAO[d]; CHAO[b] ^= CHAO[c]; CHAO[b] = rotl32(CHAO[b], 7); \ +} while (0) + +// These are two options for the ChaCha8 quarter-round implementation of different size and speed. +// There is also a obvious middleground: +// static void chaqround(int a, int b, int c, int d) { QUARTERROUND(a, b, c, d); } +// But it takes 3654 cycles and 130 opcodes, so it's kinda pointless. + +#ifdef __OPTIMIZE_SIZE__ + +// 4086 cycles, 100 opcodes for `gcc-13.2.0 -Os -mips32r2 -mtune=24kc -mips16` +# define quarterround(a, b, c, d) quarterround_1arg((a) | ((b) << 4) | ((c) << 8) | ((d) << 12)) + +static void quarterround_1arg(unsigned ndx) +{ + const unsigned a = 0x0F & ndx; + const unsigned b = 0x0F & (ndx >> 4); + const unsigned c = 0x0F & (ndx >> 8); + const unsigned d = ndx >> 12; + QUARTERROUND(a, b, c, d); +} + +#else + +// 1944 cycles, 320 opcodes for the same MIPS32r2 machine +# define quarterround(a, b, c, d) QUARTERROUND(a, b, c, d) + +#endif // __OPTIMIZE_SIZE__ + +static void chacha8(void) +{ + CHA[12]++; + CHA[13] += !CHA[12]; + + memcpy(CHAO, CHA, sizeof(CHAO)); + + for (unsigned i = 4; i; i--) { + quarterround(0, 4, 8, 12); + quarterround(1, 5, 9, 13); + quarterround(2, 6, 10, 14); + quarterround(3, 7, 11, 15); + quarterround(0, 5, 10, 15); + quarterround(1, 6, 11, 12); + quarterround(2, 7, 8, 13); + quarterround(3, 4, 9, 14); + } + + for (unsigned i = 0; i < 16; i++) + CHAO[i] += CHA[i]; + + CHAOBYEs = sizeof(CHAO); +} + +// rand16() is the most popular in rand*() functions family, it is called once per ≈ every DNS request, +// so it should not waste generated entropy. +// TODO: count real-world stats for an OpenWRT box. +uint16_t charand16(void) +{ + // CHAOBYEs is always aligned at uint16_t boundary + if (!CHAOBYEs) chacha8(); + CHAOBYEs -= sizeof(uint16_t); + return *(uint16_t*)(((uint8_t*)CHAO) + CHAOBYEs); +} + +uint32_t charand32(void) +{ + CHAOBYEs &= (UINT_MAX - 3); // skip some to make read aligned + if (!CHAOBYEs) chacha8(); + CHAOBYEs -= sizeof(uint32_t); + return *(uint32_t*)(((uint8_t*)CHAO) + CHAOBYEs); +} + +uint64_t charand64(void) +{ + CHAOBYEs &= (UINT_MAX - 7); // skip some to make read aligned + if (!CHAOBYEs) chacha8(); + CHAOBYEs -= sizeof(uint64_t); + return *(uint64_t*)(((uint8_t*)CHAO) + CHAOBYEs); +} diff --git src/charand.h src/charand.h new file mode 100644 index 00000000..a831426a --- /dev/null +++ src/charand.h @@ -0,0 +1,42 @@ +/* dnsmasq is Copyright (c) 2000-2024 Simon Kelley + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 dated June, 1991, or + (at your option) version 3 dated 29 June, 2007. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ +#ifndef CHARAND_H_C3BD69196178 +#define CHARAND_H_C3BD69196178 + +#if !defined(HAVE_GETENTROPY) && !defined(RANDFILE) +# define RANDFILE "/dev/urandom" +#endif + +#include <stdint.h> +#include <stdbool.h> + +int charand_init(void); +bool charand_isinit(void); +void charand_rekey(void); +void charand_atfork_child(void); + +uint16_t charand16(void); +uint32_t charand32(void); +uint64_t charand64(void); + +static inline uintptr_t charandptr(void) +{ + const unsigned szp = sizeof(void*); + _Static_assert(szp == 4 || szp == 8, "void* is neither 32-bit nor 64-bit"); + return szp == 4 ? charand32() : charand64(); +} + +#endif // CHARAND_H_C3BD69196178 diff --git src/dnsmasq.h src/dnsmasq.h index 65c458e9..6421280d 100644 --- src/dnsmasq.h +++ src/dnsmasq.h @@ -1450,11 +1450,14 @@ char *ds_digest_name(int digest); char *algo_digest_name(int algo); char *nsec3_digest_name(int digest); +#include "charand.h" + /* util.c */ void rand_init(void); -unsigned short rand16(void); -u32 rand32(void); -u64 rand64(void); +static inline unsigned short rand16(void) { return charand16(); } +static inline u32 rand32(void) { return charand32(); } +static inline u64 rand64(void) { return charand64(); } +static inline uintptr_t randptr(void) { return charandptr(); } unsigned int should_reseed(time_t last, time_t now); int rr_on_list(struct rrlist *list, unsigned short rr); int legal_hostname(char *name); diff --git src/util.c src/util.c index 1631328c..63a1bcfd 100644 --- src/util.c +++ src/util.c @@ -14,12 +14,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -/* The SURF random number generator was taken from djbdns-1.05, by - Daniel J Bernstein, which is public domain. */ - #include "dnsmasq.h" -#include <stdbool.h> #ifdef HAVE_BROKEN_RTC #include <sys/times.h> @@ -35,111 +31,14 @@ #include <sys/utsname.h> #endif -#ifndef HAVE_GETENTROPY -// Non-standard libc getentropy() might use getrandom() avoiding filesystem access, that's great -// for jails and chroots. However, a fallback implemetation is required for older systems that have -// no getentropy() in libc. Also, getentropy() might block if the kernel has not initialized random -// pool yet. However, dnsmasq is never started that early during the OpenWRT boot process (at least). -#define getentropy(a, b) getentropy_fallback(a, b) -static int getentropy_fallback(void *buffer, size_t length) -{ - const int fd = open(RANDFILE, O_RDONLY); - if (fd == -1) - return -1; - const int okay = read_write(fd, buffer, length, 1); - close(fd); - return okay ? 0 : -1; -} -#endif // HAVE_GETENTROPY - -/* SURF random number generator */ - -static struct surf_state { - u32 seed[32]; - u32 in[12]; -} surfst; -static u32 out[8]; -static int outleft = 0; - void rand_init() { - const u32 * const in = surfst.in; - const bool reseed = in[0] || in[1] || in[2] || in[3] || in[4] || in[5] || - in[6] || in[7] || in[8] || in[9] || in[10] || in[11]; - struct surf_state next; - const unsigned bytes = reseed ? sizeof(next.seed) : sizeof(next); - if (getentropy(&next, bytes) != 0) - { - if (!reseed) - die(_("failed to seed the random number generator: %s"), NULL, EC_MISC); - else - my_syslog(LOG_ERR, _("failed to reseed the random number generator: %s"), strerror(errno)); - } - _Static_assert(surfst.in == surfst.seed + countof(surfst.seed)); // No weird alignment gaps. - for (unsigned int i = 0; i < bytes / sizeof(next.seed[0]); i++) - surfst.seed[i] ^= next.seed[i]; -} - -static void rand_atfork(pid_t fpid) -{ - struct surf_state next; - for (unsigned int i = 0; i < countof(next.seed); i++) - next.seed[i] = rand32(); - // Child reseeds the state with generated values. Parent skips those values explicitly as they - // might be exposed to an external observer and used to guess something about PRNG of the child. - if (fpid == 0) - for (unsigned int i = 0; i < countof(next.seed); i++) - surfst.seed[i] ^= next.seed[i]; -} - -#define ROTATE(x,b) (((x) << (b)) | ((x) >> (32 - (b)))) -#define MUSH(i,b) x = t[i] += (((x ^ seed[i]) + sum) ^ ROTATE(x,b)); - -static void surf(void) -{ - u32 * const in = surfst.in; - const u32 * const seed = surfst.seed; - u32 t[12]; u32 x; u32 sum = 0; - int r; int i; int loop; - - // Overflowing 32-bit counter is trivial, overflowing 64-bit counter takes 146 years - // at 4 GHz rate. So let counter be 64-bit as 128-bit counter is excessive. - in[0]++; in[1] += !in[0]; - - for (i = 0;i < 12;++i) t[i] = in[i] ^ seed[12 + i]; - for (i = 0;i < 8;++i) out[i] = seed[24 + i]; - x = t[11]; - for (loop = 0;loop < 2;++loop) { - for (r = 0;r < 16;++r) { - sum += 0x9e3779b9; - MUSH(0,5) MUSH(1,7) MUSH(2,9) MUSH(3,13) - MUSH(4,5) MUSH(5,7) MUSH(6,9) MUSH(7,13) - MUSH(8,5) MUSH(9,7) MUSH(10,9) MUSH(11,13) - } - for (i = 0;i < 8;++i) out[i] ^= t[i + 4]; - } - - outleft = 8; -} - -unsigned short rand16(void) -{ - return (unsigned short)rand32(); -} - -u32 rand32(void) -{ - if (!outleft) - surf(); - return out[--outleft]; -} - -u64 rand64(void) -{ - if (outleft < 2) - surf(); - outleft -= 2; - return (u64)out[outleft+1] + (((u64)out[outleft]) << 32); + if (charand_init() == 0) + return; + else if (charand_isinit()) + my_syslog(LOG_ERR, _("failed to reseed the random number generator: %s"), strerror(errno)); + else + die(_("failed to seed the random number generator: %s"), NULL, EC_MISC); } // Reseeding hourly to avoid low-entropy condition right after boot that is somewhat possible @@ -379,8 +278,8 @@ void safe_pipe(int *fd, int read_noblock) pid_t my_fork() { const pid_t fpid = fork(); - if (fpid != -1) - rand_atfork(fpid); + if (fpid == 0) + charand_atfork_child(); return fpid; } -- 2.34.1 _______________________________________________ Dnsmasq-discuss mailing list Dnsmasq-discuss@lists.thekelleys.org.uk https://lists.thekelleys.org.uk/cgi-bin/mailman/listinfo/dnsmasq-discuss