Some of you might have seen a discussion on source-changes-d about pthread_atfork() (and arc4random() though that is not relevant here).
pthread_atfork is called in constructors for libraries, incl libc, so isn't allowed to use malloc(), which might not have been initialised yet. It used to use malloc() which broke some systems. Christos fixed that a week ago, but only probably - that is, the current code allows for 16 pthread_atfork callback functions to be registered (each call can register 1, 2 or 3 of them), and then reverts to malloc after that. That's probably enough, but only probably (it really depends upon how many calls to pthread_atfork() happen to be made before malloc() is ready to be used). An earlier attempt to fix this used mmap(), but not very intelligently (the problems that version caused were just a simple error in the mmap() call, not anything that was difficult to fix) - it used one mmap'd page for every callback function registered, which was kind of overkill. The version proposed here goes back to using mmap() but fits as many callback functions in each mapped page as it can. It retains the statically allocated block of 16 data structs, but only as pacifier for some absurd code which believes that setting rlimit(RLIM_AS) to 0 is a sane thing to do (and which therefore causes mmap() to fail). That (failing) is allowed of pthread_atfork() but not generally desireable. Eliminating the static allocation is trivial if desired. Below (not attached, just included) you will find a complete cvs diff of the proposed change. Because that is somewhat hard to read (diff kind of finds blank lines and similar which it detects as unchanged, and otherwise makes something of a mess of this particular change) I am also including the guts of the new algorithm, the part with the significant changes (but that alone is not enough, global decls, include files, etc, are not included in that, just the two functions (which replace 3) which have the executable code changes. The rest should be easy enough to spot in the diff. If anyone has any comments or suggestions, please make them soon, before this code gets comitted (it builds OK, etc., I have been using it in my builds for a week now). kre First the code as it appears, the diff follows. /* * Nb: nothing allocated by this allocator is ever freed. * (there is no API to free anything, and no need for one) * * The code relies upon this. */ static struct atfork_callback * af_alloc(unsigned int blocks) { struct atfork_callback *result; if (__predict_false(blocks == 0)) return NULL; if (__predict_true(atfork_storage == NULL)) { for (size_t i = 0; i < __arraycount(atfork_builtin); i++) { if (atfork_builtin[i].fn == NULL) { if (i + blocks <= __arraycount(atfork_builtin)) return &atfork_builtin[i]; else break; } } } if (__predict_false(atfork_storage == NULL || cb_used(atfork_storage) + blocks > cb_ents(atfork_storage))) { if (__predict_false(hw_pagesize == 0)) { size_t len = sizeof(hw_pagesize); if (sysctl(hw_pagesize_sysctl, 2, &hw_pagesize, &len, NULL, 0) != 0) return NULL; if (len != sizeof(hw_pagesize)) return NULL; if (hw_pagesize == 0 || (hw_pagesize & 0xFF) != 0) return NULL; } atfork_storage = mmap(0, hw_pagesize, PROT_READ|PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); if (__predict_false(atfork_storage == NULL)) return NULL; cb_used(atfork_storage) = 1; cb_ents(atfork_storage) = (uint16_t)(hw_pagesize / sizeof(struct atfork_cb_block)); if (__predict_false(cb_ents(atfork_storage) < blocks + 1)) return NULL; } result = cb_blocks(atfork_storage) + cb_used(atfork_storage); cb_used(atfork_storage) += blocks; return result; } int pthread_atfork(void (*prepare)(void), void (*parent)(void), void (*child)(void)) { struct atfork_callback *newprepare, *newparent, *newchild; sigset_t mask, omask; int error; sigfillset(&mask); thr_sigsetmask(SIG_SETMASK, &mask, &omask); mutex_lock(&atfork_lock); /* * Note here that we either get all the blocks * we need, in one call, or we get NULL. * * Note also that a NULL return is not an error * if no blocks were required (all args == NULL) */ newprepare = af_alloc((prepare != NULL) + (parent != NULL) + (child != NULL)); error = ENOMEM; /* in case of "goto out" */ newparent = newprepare; if (prepare != NULL) { if (__predict_false(newprepare == NULL)) goto out; newprepare->fn = prepare; newparent++; } newchild = newparent; if (parent != NULL) { if (__predict_false(newparent == NULL)) goto out; newparent->fn = parent; newchild++; } if (child != NULL) { if (__predict_false(newchild == NULL)) goto out; newchild->fn = child; } /* * The order in which the functions are called is specified as * LIFO for the prepare handler and FIFO for the others; insert * at the head and tail as appropriate so that SIMPLEQ_FOREACH() * produces the right order. */ if (prepare) SIMPLEQ_INSERT_HEAD(&prepareq, newprepare, next); if (parent) SIMPLEQ_INSERT_TAIL(&parentq, newparent, next); if (child) SIMPLEQ_INSERT_TAIL(&childq, newchild, next); error = 0; out:; mutex_unlock(&atfork_lock); thr_sigsetmask(SIG_SETMASK, &omask, NULL); return error; } Index: pthread_atfork.c =================================================================== RCS file: /cvsroot/src/lib/libc/gen/pthread_atfork.c,v retrieving revision 1.26 diff -u -r1.26 pthread_atfork.c --- pthread_atfork.c 4 Mar 2025 16:40:46 -0000 1.26 +++ pthread_atfork.c 10 Mar 2025 05:17:59 -0000 @@ -39,7 +39,12 @@ #include <errno.h> #include <stdlib.h> #include <unistd.h> + +#include <sys/mman.h> +#include <sys/param.h> #include <sys/queue.h> +#include <sys/sysctl.h> + #include "extern.h" #include "reentrant.h" @@ -59,6 +64,21 @@ void (*fn)(void); }; +struct atfork_cb_header { + uint16_t entries; + uint16_t used; +}; + +struct atfork_cb_block { + union { + struct atfork_callback block; + struct atfork_cb_header hdr; + } u; +}; + +#define cb_blocks(bp) (&(bp)->u.block) +#define cb_ents(bp) (bp)->u.hdr.entries +#define cb_used(bp) (bp)->u.hdr.used /* * We need to keep a cache for of at least 6, one for prepare, one for parent, @@ -70,6 +90,10 @@ * space for 16 since it is just 256 bytes. */ static struct atfork_callback atfork_builtin[16]; +static struct atfork_cb_block *atfork_storage = NULL; +static int hw_pagesize = 0; + +static const int hw_pagesize_sysctl[2] = { CTL_HW, HW_PAGESIZE }; /* * Hypothetically, we could protect the queues with a rwlock which is @@ -86,27 +110,59 @@ static struct atfork_callback_q parentq = SIMPLEQ_HEAD_INITIALIZER(parentq); static struct atfork_callback_q childq = SIMPLEQ_HEAD_INITIALIZER(childq); +/* + * Nb: nothing allocated by this allocator is ever freed. + * (there is no API to free anything, and no need for one) + * + * The code relies upon this. + */ static struct atfork_callback * -af_alloc(void) +af_alloc(unsigned int blocks) { + struct atfork_callback *result; + + if (__predict_false(blocks == 0)) + return NULL; - for (size_t i = 0; i < __arraycount(atfork_builtin); i++) { - if (atfork_builtin[i].fn == NULL) - return &atfork_builtin[i]; + if (__predict_true(atfork_storage == NULL)) { + for (size_t i = 0; i < __arraycount(atfork_builtin); i++) { + if (atfork_builtin[i].fn == NULL) { + if (i + blocks <= __arraycount(atfork_builtin)) + return &atfork_builtin[i]; + else + break; + } + } } - return malloc(sizeof(atfork_builtin)); -} + if (__predict_false(atfork_storage == NULL || + cb_used(atfork_storage) + blocks > cb_ents(atfork_storage))) { + if (__predict_false(hw_pagesize == 0)) { + size_t len = sizeof(hw_pagesize); + + if (sysctl(hw_pagesize_sysctl, 2, &hw_pagesize, + &len, NULL, 0) != 0) + return NULL; + if (len != sizeof(hw_pagesize)) + return NULL; + if (hw_pagesize == 0 || (hw_pagesize & 0xFF) != 0) + return NULL; + } + atfork_storage = mmap(0, hw_pagesize, PROT_READ|PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (__predict_false(atfork_storage == NULL)) + return NULL; + cb_used(atfork_storage) = 1; + cb_ents(atfork_storage) = + (uint16_t)(hw_pagesize / sizeof(struct atfork_cb_block)); + if (__predict_false(cb_ents(atfork_storage) < blocks + 1)) + return NULL; + } -static void -af_free(struct atfork_callback *af) -{ + result = cb_blocks(atfork_storage) + cb_used(atfork_storage); + cb_used(atfork_storage) += blocks; - if (af >= atfork_builtin - && af < atfork_builtin + __arraycount(atfork_builtin)) - af->fn = NULL; - else - free(af); + return result; } int @@ -117,42 +173,42 @@ sigset_t mask, omask; int error; - newprepare = newparent = newchild = NULL; - sigfillset(&mask); thr_sigsetmask(SIG_SETMASK, &mask, &omask); mutex_lock(&atfork_lock); + + /* + * Note here that we either get all the blocks + * we need, in one call, or we get NULL. + * + * Note also that a NULL return is not an error + * if no blocks were required (all args == NULL) + */ + newprepare = af_alloc((prepare != NULL) + + (parent != NULL) + (child != NULL)); + + error = ENOMEM; /* in case of "goto out" */ + + newparent = newprepare; if (prepare != NULL) { - newprepare = af_alloc(); - if (newprepare == NULL) { - error = ENOMEM; + if (__predict_false(newprepare == NULL)) goto out; - } newprepare->fn = prepare; + newparent++; } + newchild = newparent; if (parent != NULL) { - newparent = af_alloc(); - if (newparent == NULL) { - if (newprepare != NULL) - af_free(newprepare); - error = ENOMEM; + if (__predict_false(newparent == NULL)) goto out; - } newparent->fn = parent; + newchild++; } if (child != NULL) { - newchild = af_alloc(); - if (newchild == NULL) { - if (newprepare != NULL) - af_free(newprepare); - if (newparent != NULL) - af_free(newparent); - error = ENOMEM; + if (__predict_false(newchild == NULL)) goto out; - } newchild->fn = child; } @@ -168,9 +224,11 @@ SIMPLEQ_INSERT_TAIL(&parentq, newparent, next); if (child) SIMPLEQ_INSERT_TAIL(&childq, newchild, next); + error = 0; -out: mutex_unlock(&atfork_lock); + out:; + mutex_unlock(&atfork_lock); thr_sigsetmask(SIG_SETMASK, &omask, NULL); return error; }