https://gcc.gnu.org/g:ca0ea3d4192313ad00da4ff734baffcecafe0b1f
commit r14-11399-gca0ea3d4192313ad00da4ff734baffcecafe0b1f Author: Iain Buclaw <ibuc...@gdcproject.org> Date: Mon Mar 10 20:52:49 2025 +0100 libphobos: Default to libc closefrom in spawnProcessPosix [PR119112] This is a backport of two changes in upstream Phobos. - The current implementation of spawnProcessPosix is broken on systems with a large `ulimit -n' because it always OOMs making it impossible to spawn processes. Using the libc implementation, when available, for doing file descriptor operations en-mass solves this problem. - The fallback code now reads the list of file descriptors in use from /dev/fd or /proc/this/fd, avoiding the need to scroll through the entire list of possible file descriptors. This fixes the fork process being very slow and memory intensive when RLIMIT_NOFILE is too high. PR d/119112 libphobos/ChangeLog: * libdruntime/core/sys/freebsd/unistd.d (closefrom): Add binding. * libdruntime/core/sys/linux/unistd.d (closefrom): Likewise. * libdruntime/core/sys/openbsd/unistd.d (closefrom): Likewise. * src/std/process.d (enum InternalError): Add closefds_dup2. (spawnProcessPosix): Use closefrom when available. Reviewed-on: https://github.com/dlang/phobos/pull/9048 Reviewed-on: https://github.com/dlang/phobos/pull/9077 Diff: --- libphobos/libdruntime/core/sys/freebsd/unistd.d | 2 + libphobos/libdruntime/core/sys/linux/unistd.d | 4 + libphobos/libdruntime/core/sys/openbsd/unistd.d | 2 + libphobos/src/std/process.d | 194 ++++++++++++++++++------ 4 files changed, 158 insertions(+), 44 deletions(-) diff --git a/libphobos/libdruntime/core/sys/freebsd/unistd.d b/libphobos/libdruntime/core/sys/freebsd/unistd.d index 493cda1c8c9f..ebb7afa2726c 100644 --- a/libphobos/libdruntime/core/sys/freebsd/unistd.d +++ b/libphobos/libdruntime/core/sys/freebsd/unistd.d @@ -17,3 +17,5 @@ extern(C): nothrow: int getosreldate() pure @trusted; + +void closefrom(int); diff --git a/libphobos/libdruntime/core/sys/linux/unistd.d b/libphobos/libdruntime/core/sys/linux/unistd.d index faa226cf407c..548870268dbf 100644 --- a/libphobos/libdruntime/core/sys/linux/unistd.d +++ b/libphobos/libdruntime/core/sys/linux/unistd.d @@ -5,6 +5,7 @@ public import core.sys.posix.unistd; version (linux): extern(C): nothrow: +@nogc: // Additional seek constants for sparse file handling // from Linux's unistd.h, stdio.h, and linux/fs.h @@ -21,3 +22,6 @@ char* getpass(const(char)* prompt); // Exit all threads in a process void exit_group(int status); + +/// Close all open file descriptors greater or equal to `lowfd` +void closefrom(int lowfd); diff --git a/libphobos/libdruntime/core/sys/openbsd/unistd.d b/libphobos/libdruntime/core/sys/openbsd/unistd.d index 4232c0360497..9618543d8da8 100644 --- a/libphobos/libdruntime/core/sys/openbsd/unistd.d +++ b/libphobos/libdruntime/core/sys/openbsd/unistd.d @@ -19,3 +19,5 @@ int getthrname(pid_t, char*, size_t); int pledge(const scope char*, const scope char*); int setthrname(pid_t, const scope char*); int unveil(const scope char*, const scope char*); + +int closefrom(int); diff --git a/libphobos/src/std/process.d b/libphobos/src/std/process.d index 494910f35350..3b8922179178 100644 --- a/libphobos/src/std/process.d +++ b/libphobos/src/std/process.d @@ -872,6 +872,7 @@ version (Posix) private enum InternalError : ubyte doubleFork, malloc, preExec, + closefds_dup2, } /* @@ -1000,7 +1001,7 @@ private Pid spawnProcessPosix(scope const(char[])[] args, if (config.flags & Config.Flags.detached) close(pidPipe[0]); close(forkPipe[0]); - immutable forkPipeOut = forkPipe[1]; + auto forkPipeOut = forkPipe[1]; immutable pidPipeOut = pidPipe[1]; // Set the working directory. @@ -1034,56 +1035,157 @@ private Pid spawnProcessPosix(scope const(char[])[] args, if (!(config.flags & Config.Flags.inheritFDs)) { - // NOTE: malloc() and getrlimit() are not on the POSIX async - // signal safe functions list, but practically this should - // not be a problem. Java VM and CPython also use malloc() - // in its own implementation via opendir(). - import core.stdc.stdlib : malloc; - import core.sys.posix.poll : pollfd, poll, POLLNVAL; - import core.sys.posix.sys.resource : rlimit, getrlimit, RLIMIT_NOFILE; - - // Get the maximum number of file descriptors that could be open. - rlimit r; - if (getrlimit(RLIMIT_NOFILE, &r) != 0) - { - abortOnError(forkPipeOut, InternalError.getrlimit, .errno); - } - immutable maxDescriptors = cast(int) r.rlim_cur; + version (FreeBSD) + import core.sys.freebsd.unistd : closefrom; + else version (OpenBSD) + import core.sys.openbsd.unistd : closefrom; - // The above, less stdin, stdout, and stderr - immutable maxToClose = maxDescriptors - 3; - - // Call poll() to see which ones are actually open: - auto pfds = cast(pollfd*) malloc(pollfd.sizeof * maxToClose); - if (pfds is null) - { - abortOnError(forkPipeOut, InternalError.malloc, .errno); - } - foreach (i; 0 .. maxToClose) - { - pfds[i].fd = i + 3; - pfds[i].events = 0; - pfds[i].revents = 0; - } - if (poll(pfds, maxToClose, 0) >= 0) + static if (!__traits(compiles, closefrom)) { - foreach (i; 0 .. maxToClose) + void fallback (int lowfd) { - // don't close pipe write end - if (pfds[i].fd == forkPipeOut) continue; - // POLLNVAL will be set if the file descriptor is invalid. - if (!(pfds[i].revents & POLLNVAL)) close(pfds[i].fd); + import core.sys.posix.dirent : dirent, opendir, readdir, closedir, DIR; + import core.sys.posix.unistd : close; + import core.sys.posix.stdlib : atoi, malloc, free; + import core.sys.posix.sys.resource : rlimit, getrlimit, RLIMIT_NOFILE; + + // Get the maximum number of file descriptors that could be open. + rlimit r; + if (getrlimit(RLIMIT_NOFILE, &r) != 0) + abortOnError(forkPipeOut, InternalError.getrlimit, .errno); + + immutable maxDescriptors = cast(int) r.rlim_cur; + + // Missing druntime declaration + pragma(mangle, "dirfd") + extern(C) nothrow @nogc int dirfd(DIR* dir); + + DIR* dir = null; + + // We read from /dev/fd or /proc/self/fd only if the limit is high enough + if (maxDescriptors > 128*1024) + { + // Try to open the directory /dev/fd or /proc/self/fd + dir = opendir("/dev/fd"); + if (dir is null) dir = opendir("/proc/self/fd"); + } + + // If we have a directory, close all file descriptors except stdin, stdout, and stderr + if (dir) + { + scope(exit) closedir(dir); + + int opendirfd = dirfd(dir); + + // Iterate over all file descriptors + while (true) + { + dirent* entry = readdir(dir); + + if (entry is null) break; + if (entry.d_name[0] == '.') continue; + + int fd = atoi(cast(char*) entry.d_name); + + // Don't close stdin, stdout, stderr, or the directory file descriptor + if (fd < lowfd || fd == opendirfd) continue; + + close(fd); + } + } + else + { + // This is going to allocate 8 bytes for each possible file descriptor from lowfd to r.rlim_cur + if (maxDescriptors <= 128*1024) + { + // NOTE: malloc() and getrlimit() are not on the POSIX async + // signal safe functions list, but practically this should + // not be a problem. Java VM and CPython also use malloc() + // in its own implementation via opendir(). + import core.stdc.stdlib : malloc; + import core.sys.posix.poll : pollfd, poll, POLLNVAL; + + immutable maxToClose = maxDescriptors - lowfd; + + // Call poll() to see which ones are actually open: + auto pfds = cast(pollfd*) malloc(pollfd.sizeof * maxToClose); + if (pfds is null) + { + abortOnError(forkPipeOut, InternalError.malloc, .errno); + } + + foreach (i; 0 .. maxToClose) + { + pfds[i].fd = i + lowfd; + pfds[i].events = 0; + pfds[i].revents = 0; + } + + if (poll(pfds, maxToClose, 0) < 0) + // couldn't use poll, use the slow path. + goto LslowClose; + + foreach (i; 0 .. maxToClose) + { + // POLLNVAL will be set if the file descriptor is invalid. + if (!(pfds[i].revents & POLLNVAL)) close(pfds[i].fd); + } + } + else + { + LslowClose: + // Fall back to closing everything. + foreach (i; lowfd .. maxDescriptors) + { + close(i); + } + } + } } - } - else - { - // Fall back to closing everything. - foreach (i; 3 .. maxDescriptors) + + // closefrom may not be available on the version of glibc we build against. + // Until we find a way to perform this check we will try to use dlsym to + // check for the function. See: https://github.com/dlang/phobos/pull/9048 + version (CRuntime_Glibc) { - if (i == forkPipeOut) continue; - close(i); + void closefrom (int lowfd) { + static bool tryGlibcClosefrom (int lowfd) { + import core.sys.posix.dlfcn : dlopen, dlclose, dlsym, dlerror, RTLD_LAZY; + + void *handle = dlopen("libc.so.6", RTLD_LAZY); + if (!handle) + return false; + scope(exit) dlclose(handle); + + // Clear errors + dlerror(); + alias closefromT = extern(C) void function(int) @nogc @system nothrow; + auto closefrom = cast(closefromT) dlsym(handle, "closefrom"); + if (dlerror()) + return false; + + closefrom(lowfd); + return true; + } + + if (!tryGlibcClosefrom(lowfd)) + fallback(lowfd); + } } + else + alias closefrom = fallback; } + + // We need to close all open file descriptors excluding std{in,out,err} + // and forkPipeOut because we still need it. + // Since the various libc's provide us with `closefrom` move forkPipeOut + // to position 3, right after STDERR_FILENO, and close all FDs following that. + if (dup2(forkPipeOut, 3) == -1) + abortOnError(forkPipeOut, InternalError.closefds_dup2, .errno); + forkPipeOut = 3; + // forkPipeOut needs to be closed after we call `exec`. + setCLOEXEC(forkPipeOut, true); + closefrom(forkPipeOut + 1); } else // This is already done if we don't inherit descriptors. { @@ -1189,6 +1291,10 @@ private Pid spawnProcessPosix(scope const(char[])[] args, case InternalError.preExec: errorMsg = "Failed to execute preExecFunction"; break; + case InternalError.closefds_dup2: + assert(!(config.flags & Config.Flags.inheritFDs)); + errorMsg = "Failed to close inherited file descriptors"; + break; case InternalError.noerror: assert(false); }