The branch main has been updated by kevans: URL: https://cgit.FreeBSD.org/src/commit/?id=d5bc81e6c76ba6fcf4c7a813e3b9731a7889d64e
commit d5bc81e6c76ba6fcf4c7a813e3b9731a7889d64e Author: Kyle Evans <kev...@freebsd.org> AuthorDate: 2025-07-16 15:43:22 +0000 Commit: Kyle Evans <kev...@freebsd.org> CommitDate: 2025-07-26 21:31:41 +0000 kern: move the vnode user coredumper out into its own file This more cleanly contains the bits that are specifically relevant to dumping coredumps out to a vnode, which will make future changes a bit easier to review. This also makes the scope of the relevant sysctls easier to reason about in the process, as they're not visible outside of the vnode dumper file -- this will mostly become relevant when we allow pluggable dumpers. While we're here, move all of the coredump-related stuff out into its own kern_ucoredump.c and sys/ucoredump.h. We have enough that it's useful to separate it out and de-clutter kern_sig.c and sys/exec.h a bit. Reviewed by: kib, markj (both earlier version) Differential Revision: https://reviews.freebsd.org/D51349 --- share/man/man5/core.5 | 8 +- sys/conf/files | 2 + sys/kern/coredump_vnode.c | 553 +++++++++++++++++++++++++++++++++++++++++++ sys/kern/imgact_elf.c | 4 +- sys/kern/kern_exec.c | 31 +-- sys/kern/kern_sig.c | 591 +--------------------------------------------- sys/kern/kern_ucoredump.c | 212 +++++++++++++++++ sys/sys/exec.h | 48 ---- sys/sys/signalvar.h | 1 + sys/sys/ucoredump.h | 68 ++++++ 10 files changed, 851 insertions(+), 667 deletions(-) diff --git a/share/man/man5/core.5 b/share/man/man5/core.5 index 8efc8c970014..aa6e3c67097d 100644 --- a/share/man/man5/core.5 +++ b/share/man/man5/core.5 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd November 12, 2023 +.Dd July 17, 2025 .Dt CORE 5 .Os .Sh NAME @@ -116,11 +116,13 @@ variable .Va kern.sugid_coredump to 1. .Pp -Corefiles can be compressed by the kernel if the following item -is included in the kernel configuration file: +Corefiles can be compressed by the kernel if one of the following items +are included in the kernel configuration file: .Bl -tag -width "1234567890" -compact -offset "12345" .It options GZIO +.It options +ZSTDIO .El .Pp The following sysctl control core file compression: diff --git a/sys/conf/files b/sys/conf/files index dd0d390962f2..b7c19fae0b8e 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -3768,6 +3768,7 @@ gnu/gcov/gcov_subr.c optional gcov kern/bus_if.m standard kern/clock_if.m standard +kern/coredump_vnode.c standard kern/cpufreq_if.m standard kern/device_if.m standard kern/imgact_binmisc.c optional imgact_binmisc @@ -3856,6 +3857,7 @@ kern/kern_time.c standard kern/kern_timeout.c standard kern/kern_tslog.c optional tslog kern/kern_ubsan.c optional kubsan +kern/kern_ucoredump.c standard kern/kern_umtx.c standard kern/kern_uuid.c standard kern/kern_vnodedumper.c standard diff --git a/sys/kern/coredump_vnode.c b/sys/kern/coredump_vnode.c new file mode 100644 index 000000000000..675503476a4e --- /dev/null +++ b/sys/kern/coredump_vnode.c @@ -0,0 +1,553 @@ +/* + * SPDX-License-Identifier: BSD-3-Clause AND BSD-2-Clause + * + * Copyright (c) 1982, 1986, 1989, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * - kern_sig.c + */ +/* + * Copyright (c) 1993, David Greenman + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * -kern_exec.c + */ + +#include <sys/systm.h> +#include <sys/acct.h> +#include <sys/compressor.h> +#include <sys/devctl.h> +#include <sys/fcntl.h> +#include <sys/jail.h> +#include <sys/limits.h> +#include <sys/namei.h> +#include <sys/proc.h> +#include <sys/sbuf.h> +#include <sys/stat.h> +#include <sys/sysctl.h> +#include <sys/sysent.h> +#include <sys/syslog.h> +#include <sys/ucoredump.h> +#include <sys/unistd.h> +#include <sys/vnode.h> + +#include <security/audit/audit.h> + +#define GZIP_SUFFIX ".gz" +#define ZSTD_SUFFIX ".zst" + +#define MAX_NUM_CORE_FILES 100000 +#ifndef NUM_CORE_FILES +#define NUM_CORE_FILES 5 +#endif + +_Static_assert(NUM_CORE_FILES >= 0 && NUM_CORE_FILES <= MAX_NUM_CORE_FILES, + "NUM_CORE_FILES is out of range (0 to " __STRING(MAX_NUM_CORE_FILES) ")"); +static int num_cores = NUM_CORE_FILES; + +static int capmode_coredump; +SYSCTL_INT(_kern, OID_AUTO, capmode_coredump, CTLFLAG_RWTUN, + &capmode_coredump, 0, "Allow processes in capability mode to dump core"); + +static int set_core_nodump_flag = 0; +SYSCTL_INT(_kern, OID_AUTO, nodump_coredump, CTLFLAG_RW, &set_core_nodump_flag, + 0, "Enable setting the NODUMP flag on coredump files"); + +static int coredump_devctl = 0; +SYSCTL_INT(_kern, OID_AUTO, coredump_devctl, CTLFLAG_RW, &coredump_devctl, + 0, "Generate a devctl notification when processes coredump"); + +/* + * corefilename[] is protected by the allproc_lock. + */ +static char corefilename[MAXPATHLEN] = { "%N.core" }; +TUNABLE_STR("kern.corefile", corefilename, sizeof(corefilename)); + +static int +sysctl_kern_corefile(SYSCTL_HANDLER_ARGS) +{ + int error; + + sx_xlock(&allproc_lock); + error = sysctl_handle_string(oidp, corefilename, sizeof(corefilename), + req); + sx_xunlock(&allproc_lock); + + return (error); +} +SYSCTL_PROC(_kern, OID_AUTO, corefile, CTLTYPE_STRING | CTLFLAG_RW | + CTLFLAG_MPSAFE, 0, 0, sysctl_kern_corefile, "A", + "Process corefile name format string"); + +static int +sysctl_debug_num_cores_check (SYSCTL_HANDLER_ARGS) +{ + int error; + int new_val; + + new_val = num_cores; + error = sysctl_handle_int(oidp, &new_val, 0, req); + if (error != 0 || req->newptr == NULL) + return (error); + if (new_val > MAX_NUM_CORE_FILES) + new_val = MAX_NUM_CORE_FILES; + if (new_val < 0) + new_val = 0; + num_cores = new_val; + return (0); +} +SYSCTL_PROC(_debug, OID_AUTO, ncores, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, sizeof(int), + sysctl_debug_num_cores_check, "I", + "Maximum number of generated process corefiles while using index format"); + +static void +vnode_close_locked(struct thread *td, struct vnode *vp) +{ + + VOP_UNLOCK(vp); + vn_close(vp, FWRITE, td->td_ucred, td); +} + +int +core_vn_write(const struct coredump_writer *cdw, const void *base, size_t len, + off_t offset, enum uio_seg seg, struct ucred *cred, size_t *resid, + struct thread *td) +{ + struct coredump_vnode_ctx *ctx = cdw->ctx; + + return (vn_rdwr_inchunks(UIO_WRITE, ctx->vp, __DECONST(void *, base), + len, offset, seg, IO_UNIT | IO_DIRECT | IO_RANGELOCKED, + cred, ctx->fcred, resid, td)); +} + +int +core_vn_extend(const struct coredump_writer *cdw, off_t newsz, + struct ucred *cred) +{ + struct coredump_vnode_ctx *ctx = cdw->ctx; + struct mount *mp; + int error; + + error = vn_start_write(ctx->vp, &mp, V_WAIT); + if (error != 0) + return (error); + vn_lock(ctx->vp, LK_EXCLUSIVE | LK_RETRY); + error = vn_truncate_locked(ctx->vp, newsz, false, cred); + VOP_UNLOCK(ctx->vp); + vn_finished_write(mp); + return (error); +} + +/* + * If the core format has a %I in it, then we need to check + * for existing corefiles before defining a name. + * To do this we iterate over 0..ncores to find a + * non-existing core file name to use. If all core files are + * already used we choose the oldest one. + */ +static int +corefile_open_last(struct thread *td, char *name, int indexpos, + int indexlen, int ncores, struct vnode **vpp) +{ + struct vnode *oldvp, *nextvp, *vp; + struct vattr vattr; + struct nameidata nd; + int error, i, flags, oflags, cmode; + char ch; + struct timespec lasttime; + + nextvp = oldvp = NULL; + cmode = S_IRUSR | S_IWUSR; + oflags = VN_OPEN_NOAUDIT | VN_OPEN_NAMECACHE | + (capmode_coredump ? VN_OPEN_NOCAPCHECK : 0); + + for (i = 0; i < ncores; i++) { + flags = O_CREAT | FWRITE | O_NOFOLLOW; + + ch = name[indexpos + indexlen]; + (void)snprintf(name + indexpos, indexlen + 1, "%.*u", indexlen, + i); + name[indexpos + indexlen] = ch; + + NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name); + error = vn_open_cred(&nd, &flags, cmode, oflags, td->td_ucred, + NULL); + if (error != 0) + break; + + vp = nd.ni_vp; + NDFREE_PNBUF(&nd); + if ((flags & O_CREAT) == O_CREAT) { + nextvp = vp; + break; + } + + error = VOP_GETATTR(vp, &vattr, td->td_ucred); + if (error != 0) { + vnode_close_locked(td, vp); + break; + } + + if (oldvp == NULL || + lasttime.tv_sec > vattr.va_mtime.tv_sec || + (lasttime.tv_sec == vattr.va_mtime.tv_sec && + lasttime.tv_nsec >= vattr.va_mtime.tv_nsec)) { + if (oldvp != NULL) + vn_close(oldvp, FWRITE, td->td_ucred, td); + oldvp = vp; + VOP_UNLOCK(oldvp); + lasttime = vattr.va_mtime; + } else { + vnode_close_locked(td, vp); + } + } + + if (oldvp != NULL) { + if (nextvp == NULL) { + if ((td->td_proc->p_flag & P_SUGID) != 0) { + error = EFAULT; + vn_close(oldvp, FWRITE, td->td_ucred, td); + } else { + nextvp = oldvp; + error = vn_lock(nextvp, LK_EXCLUSIVE); + if (error != 0) { + vn_close(nextvp, FWRITE, td->td_ucred, + td); + nextvp = NULL; + } + } + } else { + vn_close(oldvp, FWRITE, td->td_ucred, td); + } + } + if (error != 0) { + if (nextvp != NULL) + vnode_close_locked(td, oldvp); + } else { + *vpp = nextvp; + } + + return (error); +} + +/* + * corefile_open(comm, uid, pid, td, compress, vpp, namep) + * Expand the name described in corefilename, using name, uid, and pid + * and open/create core file. + * corefilename is a printf-like string, with three format specifiers: + * %N name of process ("name") + * %P process id (pid) + * %U user id (uid) + * For example, "%N.core" is the default; they can be disabled completely + * by using "/dev/null", or all core files can be stored in "/cores/%U/%N-%P". + * This is controlled by the sysctl variable kern.corefile (see above). + */ +static int +corefile_open(const char *comm, uid_t uid, pid_t pid, struct thread *td, + int compress, int signum, struct vnode **vpp, char **namep) +{ + struct sbuf sb; + struct nameidata nd; + const char *format; + char *hostname, *name; + int cmode, error, flags, i, indexpos, indexlen, oflags, ncores; + + hostname = NULL; + format = corefilename; + name = malloc(MAXPATHLEN, M_TEMP, M_WAITOK | M_ZERO); + indexlen = 0; + indexpos = -1; + ncores = num_cores; + (void)sbuf_new(&sb, name, MAXPATHLEN, SBUF_FIXEDLEN); + sx_slock(&allproc_lock); + for (i = 0; format[i] != '\0'; i++) { + switch (format[i]) { + case '%': /* Format character */ + i++; + switch (format[i]) { + case '%': + sbuf_putc(&sb, '%'); + break; + case 'H': /* hostname */ + if (hostname == NULL) { + hostname = malloc(MAXHOSTNAMELEN, + M_TEMP, M_WAITOK); + } + getcredhostname(td->td_ucred, hostname, + MAXHOSTNAMELEN); + sbuf_cat(&sb, hostname); + break; + case 'I': /* autoincrementing index */ + if (indexpos != -1) { + sbuf_printf(&sb, "%%I"); + break; + } + + indexpos = sbuf_len(&sb); + sbuf_printf(&sb, "%u", ncores - 1); + indexlen = sbuf_len(&sb) - indexpos; + break; + case 'N': /* process name */ + sbuf_printf(&sb, "%s", comm); + break; + case 'P': /* process id */ + sbuf_printf(&sb, "%u", pid); + break; + case 'S': /* signal number */ + sbuf_printf(&sb, "%i", signum); + break; + case 'U': /* user id */ + sbuf_printf(&sb, "%u", uid); + break; + default: + log(LOG_ERR, + "Unknown format character %c in " + "corename `%s'\n", format[i], format); + break; + } + break; + default: + sbuf_putc(&sb, format[i]); + break; + } + } + sx_sunlock(&allproc_lock); + free(hostname, M_TEMP); + if (compress == COMPRESS_GZIP) + sbuf_cat(&sb, GZIP_SUFFIX); + else if (compress == COMPRESS_ZSTD) + sbuf_cat(&sb, ZSTD_SUFFIX); + if (sbuf_error(&sb) != 0) { + log(LOG_ERR, "pid %ld (%s), uid (%lu): corename is too " + "long\n", (long)pid, comm, (u_long)uid); + sbuf_delete(&sb); + free(name, M_TEMP); + return (ENOMEM); + } + sbuf_finish(&sb); + sbuf_delete(&sb); + + if (indexpos != -1) { + error = corefile_open_last(td, name, indexpos, indexlen, ncores, + vpp); + if (error != 0) { + log(LOG_ERR, + "pid %d (%s), uid (%u): Path `%s' failed " + "on initial open test, error = %d\n", + pid, comm, uid, name, error); + } + } else { + cmode = S_IRUSR | S_IWUSR; + oflags = VN_OPEN_NOAUDIT | VN_OPEN_NAMECACHE | + (capmode_coredump ? VN_OPEN_NOCAPCHECK : 0); + flags = O_CREAT | FWRITE | O_NOFOLLOW; + if ((td->td_proc->p_flag & P_SUGID) != 0) + flags |= O_EXCL; + + NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name); + error = vn_open_cred(&nd, &flags, cmode, oflags, td->td_ucred, + NULL); + if (error == 0) { + *vpp = nd.ni_vp; + NDFREE_PNBUF(&nd); + } + } + + if (error != 0) { +#ifdef AUDIT + audit_proc_coredump(td, name, error); +#endif + free(name, M_TEMP); + return (error); + } + *namep = name; + return (0); +} + +/* + * The vnode dumper is the traditional coredump handler. Our policy and limits + * are generally checked already, so it creates the coredump name and passes on + * a vnode and a size limit to the process-specific coredump routine if there is + * one. If there _is not_ one, it returns ENOSYS; otherwise it returns the + * error from the process-specific routine. + */ +int +coredump_vnode(struct thread *td, off_t limit) +{ + struct proc *p = td->td_proc; + struct ucred *cred = td->td_ucred; + struct vnode *vp; + struct coredump_vnode_ctx wctx; + struct coredump_writer cdw = { }; + struct flock lf; + struct vattr vattr; + size_t fullpathsize; + int error, error1, jid, locked, ppid, sig; + char *name; /* name of corefile */ + void *rl_cookie; + char *fullpath, *freepath = NULL; + struct sbuf *sb; + + PROC_LOCK_ASSERT(p, MA_OWNED); + + ppid = p->p_oppid; + sig = p->p_sig; + jid = p->p_ucred->cr_prison->pr_id; + PROC_UNLOCK(p); + + error = corefile_open(p->p_comm, cred->cr_uid, p->p_pid, td, + compress_user_cores, sig, &vp, &name); + if (error != 0) + return (error); + + /* + * Don't dump to non-regular files or files with links. + * Do not dump into system files. Effective user must own the corefile. + */ + if (vp->v_type != VREG || VOP_GETATTR(vp, &vattr, cred) != 0 || + vattr.va_nlink != 1 || (vp->v_vflag & VV_SYSTEM) != 0 || + vattr.va_uid != cred->cr_uid) { + VOP_UNLOCK(vp); + error = EFAULT; + goto out; + } + + VOP_UNLOCK(vp); + + /* Postpone other writers, including core dumps of other processes. */ + rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); + + lf.l_whence = SEEK_SET; + lf.l_start = 0; + lf.l_len = 0; + lf.l_type = F_WRLCK; + locked = (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &lf, F_FLOCK) == 0); + + VATTR_NULL(&vattr); + vattr.va_size = 0; + if (set_core_nodump_flag) + vattr.va_flags = UF_NODUMP; + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + VOP_SETATTR(vp, &vattr, cred); + VOP_UNLOCK(vp); + PROC_LOCK(p); + p->p_acflag |= ACORE; + PROC_UNLOCK(p); + + wctx.vp = vp; + wctx.fcred = NOCRED; + + cdw.ctx = &wctx; + cdw.write_fn = core_vn_write; + cdw.extend_fn = core_vn_extend; + + if (p->p_sysent->sv_coredump != NULL) { + error = p->p_sysent->sv_coredump(td, &cdw, limit, 0); + } else { + error = ENOSYS; + } + + if (locked) { + lf.l_type = F_UNLCK; + VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_FLOCK); + } + vn_rangelock_unlock(vp, rl_cookie); + + /* + * Notify the userland helper that a process triggered a core dump. + * This allows the helper to run an automated debugging session. + */ + if (error != 0 || coredump_devctl == 0) + goto out; + sb = sbuf_new_auto(); + if (vn_fullpath_global(p->p_textvp, &fullpath, &freepath) != 0) + goto out2; + sbuf_cat(sb, "comm=\""); + devctl_safe_quote_sb(sb, fullpath); + free(freepath, M_TEMP); + sbuf_cat(sb, "\" core=\""); + + /* + * We can't lookup core file vp directly. When we're replacing a core, and + * other random times, we flush the name cache, so it will fail. Instead, + * if the path of the core is relative, add the current dir in front if it. + */ + if (name[0] != '/') { + fullpathsize = MAXPATHLEN; + freepath = malloc(fullpathsize, M_TEMP, M_WAITOK); + if (vn_getcwd(freepath, &fullpath, &fullpathsize) != 0) { + free(freepath, M_TEMP); + goto out2; + } + devctl_safe_quote_sb(sb, fullpath); + free(freepath, M_TEMP); + sbuf_putc(sb, '/'); + } + devctl_safe_quote_sb(sb, name); + sbuf_putc(sb, '"'); + + sbuf_printf(sb, " jid=%d pid=%d ppid=%d signo=%d", + jid, p->p_pid, ppid, sig); + if (sbuf_finish(sb) == 0) + devctl_notify("kernel", "signal", "coredump", sbuf_data(sb)); +out2: + sbuf_delete(sb); +out: + error1 = vn_close(vp, FWRITE, cred, td); + if (error == 0) + error = error1; +#ifdef AUDIT + audit_proc_coredump(td, name, error); +#endif + free(name, M_TEMP); + return (error); +} diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index a7d3e22e6279..2690ad3b2679 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -64,6 +64,7 @@ #include <sys/syscall.h> #include <sys/sysctl.h> #include <sys/sysent.h> +#include <sys/ucoredump.h> #include <sys/vnode.h> #include <sys/syslog.h> #include <sys/eventhandler.h> @@ -1562,9 +1563,6 @@ struct note_info { TAILQ_HEAD(note_info_list, note_info); -extern int compress_user_cores; -extern int compress_user_cores_level; - static void cb_put_phdr(vm_map_entry_t, void *); static void cb_size_segment(vm_map_entry_t, void *); static void each_dumpable_segment(struct thread *, segment_callback, void *, diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index a0a22ee8539b..5cd4d39d7236 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -70,6 +70,7 @@ #include <sys/sysent.h> #include <sys/sysproto.h> #include <sys/timers.h> +#include <sys/ucoredump.h> #include <sys/umtxvar.h> #include <sys/vnode.h> #include <sys/wait.h> @@ -1998,18 +1999,6 @@ compress_chunk(struct coredump_params *cp, char *base, char *buf, size_t len) return (error); } -int -core_vn_write(const struct coredump_writer *cdw, const void *base, size_t len, - off_t offset, enum uio_seg seg, struct ucred *cred, size_t *resid, - struct thread *td) -{ - struct coredump_vnode_ctx *ctx = cdw->ctx; - - return (vn_rdwr_inchunks(UIO_WRITE, ctx->vp, __DECONST(void *, base), - len, offset, seg, IO_UNIT | IO_DIRECT | IO_RANGELOCKED, - cred, ctx->fcred, resid, td)); -} - int core_write(struct coredump_params *cp, const void *base, size_t len, off_t offset, enum uio_seg seg, size_t *resid) @@ -2018,24 +2007,6 @@ core_write(struct coredump_params *cp, const void *base, size_t len, cp->active_cred, resid, cp->td)); } -int -core_vn_extend(const struct coredump_writer *cdw, off_t newsz, - struct ucred *cred) -{ - struct coredump_vnode_ctx *ctx = cdw->ctx; - struct mount *mp; - int error; - - error = vn_start_write(ctx->vp, &mp, V_WAIT); - if (error != 0) - return (error); - vn_lock(ctx->vp, LK_EXCLUSIVE | LK_RETRY); - error = vn_truncate_locked(ctx->vp, newsz, false, cred); - VOP_UNLOCK(ctx->vp); - vn_finished_write(mp); - return (error); -} - static int core_extend(struct coredump_params *cp, off_t newsz) { diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index e96f72d56e18..da0efac0598d 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -45,7 +45,6 @@ #include <sys/vnode.h> #include <sys/acct.h> #include <sys/capsicum.h> -#include <sys/compressor.h> #include <sys/condvar.h> #include <sys/devctl.h> #include <sys/event.h> @@ -81,6 +80,7 @@ #include <sys/syslog.h> #include <sys/sysproto.h> #include <sys/timers.h> +#include <sys/ucoredump.h> #include <sys/unistd.h> #include <sys/vmmeter.h> #include <sys/wait.h> @@ -102,7 +102,6 @@ SDT_PROBE_DEFINE2(proc, , , signal__clear, SDT_PROBE_DEFINE3(proc, , , signal__discard, "struct thread *", "struct proc *", "int"); -static int coredump(struct thread *); static int killpg1(struct thread *td, int sig, int pgid, int all, ksiginfo_t *ksi); static int issignal(struct thread *td); @@ -127,11 +126,6 @@ const struct filterops sig_filtops = { .f_event = filt_signal, }; -static int kern_logsigexit = 1; -SYSCTL_INT(_kern, KERN_LOGSIGEXIT, logsigexit, CTLFLAG_RW, - &kern_logsigexit, 0, - "Log processes quitting on abnormal signals to syslog(3)"); - static int kern_forcesigexit = 1; SYSCTL_INT(_kern, OID_AUTO, forcesigexit, CTLFLAG_RW, &kern_forcesigexit, 0, "Force trap signal to be handled"); @@ -194,26 +188,6 @@ SYSINIT(signal, SI_SUB_P1003_1B, SI_ORDER_FIRST+3, sigqueue_start, NULL); (cr1)->cr_ruid == (cr2)->cr_uid || \ (cr1)->cr_uid == (cr2)->cr_uid) -static int sugid_coredump; -SYSCTL_INT(_kern, OID_AUTO, sugid_coredump, CTLFLAG_RWTUN, - &sugid_coredump, 0, "Allow setuid and setgid processes to dump core"); - -static int capmode_coredump; -SYSCTL_INT(_kern, OID_AUTO, capmode_coredump, CTLFLAG_RWTUN, - &capmode_coredump, 0, "Allow processes in capability mode to dump core"); - -static int do_coredump = 1; -SYSCTL_INT(_kern, OID_AUTO, coredump, CTLFLAG_RW, - &do_coredump, 0, "Enable/Disable coredumps"); - -static int set_core_nodump_flag = 0; -SYSCTL_INT(_kern, OID_AUTO, nodump_coredump, CTLFLAG_RW, &set_core_nodump_flag, - 0, "Enable setting the NODUMP flag on coredump files"); - -static int coredump_devctl = 0; -SYSCTL_INT(_kern, OID_AUTO, coredump_devctl, CTLFLAG_RW, &coredump_devctl, - 0, "Generate a devctl notification when processes coredump"); - /* * Signal properties and actions. * The array below categorizes the signals and their default actions @@ -785,6 +759,13 @@ sigprop(int sig) return (0); } +bool +sig_do_core(int sig) +{ + + return ((sigprop(sig) & SIGPROP_CORE) != 0); +} + static bool sigact_flag_test(const struct sigaction *act, int flag) { @@ -3644,82 +3625,6 @@ killproc(struct proc *p, const char *why) kern_psignal(p, SIGKILL); } -/* - * Force the current process to exit with the specified signal, dumping core - * if appropriate. We bypass the normal tests for masked and caught signals, - * allowing unrecoverable failures to terminate the process without changing - * signal state. Mark the accounting record with the signal termination. - * If dumping core, save the signal number for the debugger. Calls exit and - * does not return. - */ -void -sigexit(struct thread *td, int sig) -{ - struct proc *p = td->td_proc; - const char *coreinfo; - int rv; - bool logexit; - - PROC_LOCK_ASSERT(p, MA_OWNED); - proc_set_p2_wexit(p); - - p->p_acflag |= AXSIG; - if ((p->p_flag2 & P2_LOGSIGEXIT_CTL) == 0) - logexit = kern_logsigexit != 0; - else - logexit = (p->p_flag2 & P2_LOGSIGEXIT_ENABLE) != 0; - - /* - * We must be single-threading to generate a core dump. This - * ensures that the registers in the core file are up-to-date. - * Also, the ELF dump handler assumes that the thread list doesn't - * change out from under it. - * - * XXX If another thread attempts to single-thread before us - * (e.g. via fork()), we won't get a dump at all. - */ - if ((sigprop(sig) & SIGPROP_CORE) && - thread_single(p, SINGLE_NO_EXIT) == 0) { - p->p_sig = sig; - /* - * Log signals which would cause core dumps - * (Log as LOG_INFO to appease those who don't want - * these messages.) - * XXX : Todo, as well as euid, write out ruid too - * Note that coredump() drops proc lock. - */ - rv = coredump(td); - switch (rv) { - case 0: - sig |= WCOREFLAG; - coreinfo = " (core dumped)"; - break; - case EFAULT: - coreinfo = " (no core dump - bad address)"; - break; - case EINVAL: - coreinfo = " (no core dump - invalid argument)"; - break; - case EFBIG: - coreinfo = " (no core dump - too large)"; - break; - default: - coreinfo = " (no core dump - other error)"; - break; - } - if (logexit) - log(LOG_INFO, - "pid %d (%s), jid %d, uid %d: exited on " - "signal %d%s\n", p->p_pid, p->p_comm, - p->p_ucred->cr_prison->pr_id, - td->td_ucred->cr_uid, - sig &~ WCOREFLAG, coreinfo); - } else - PROC_UNLOCK(p); - exit1(td, 0, sig); - /* NOTREACHED */ -} - /* * Send queued SIGCHLD to parent when child process's state * is changed. @@ -3813,486 +3718,6 @@ childproc_exited(struct proc *p) sigparent(p, reason, status); } -#define MAX_NUM_CORE_FILES 100000 -#ifndef NUM_CORE_FILES -#define NUM_CORE_FILES 5 -#endif -CTASSERT(NUM_CORE_FILES >= 0 && NUM_CORE_FILES <= MAX_NUM_CORE_FILES); -static int num_cores = NUM_CORE_FILES; - -static int -sysctl_debug_num_cores_check (SYSCTL_HANDLER_ARGS) -{ - int error; - int new_val; - - new_val = num_cores; - error = sysctl_handle_int(oidp, &new_val, 0, req); - if (error != 0 || req->newptr == NULL) - return (error); - if (new_val > MAX_NUM_CORE_FILES) - new_val = MAX_NUM_CORE_FILES; - if (new_val < 0) - new_val = 0; - num_cores = new_val; - return (0); -} -SYSCTL_PROC(_debug, OID_AUTO, ncores, - CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, sizeof(int), - sysctl_debug_num_cores_check, "I", - "Maximum number of generated process corefiles while using index format"); - -#define GZIP_SUFFIX ".gz" -#define ZSTD_SUFFIX ".zst" - -int compress_user_cores = 0; - -static int -sysctl_compress_user_cores(SYSCTL_HANDLER_ARGS) -{ - int error, val; - - val = compress_user_cores; - error = sysctl_handle_int(oidp, &val, 0, req); - if (error != 0 || req->newptr == NULL) - return (error); - if (val != 0 && !compressor_avail(val)) - return (EINVAL); - compress_user_cores = val; - return (error); -} -SYSCTL_PROC(_kern, OID_AUTO, compress_user_cores, - CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, 0, sizeof(int), - sysctl_compress_user_cores, "I", - "Enable compression of user corefiles (" - __XSTRING(COMPRESS_GZIP) " = gzip, " - __XSTRING(COMPRESS_ZSTD) " = zstd)"); - -int compress_user_cores_level = 6; -SYSCTL_INT(_kern, OID_AUTO, compress_user_cores_level, CTLFLAG_RWTUN, - &compress_user_cores_level, 0, - "Corefile compression level"); - -/* - * Protect the access to corefilename[] by allproc_lock. - */ -#define corefilename_lock allproc_lock - -static char corefilename[MAXPATHLEN] = {"%N.core"}; -TUNABLE_STR("kern.corefile", corefilename, sizeof(corefilename)); - -static int -sysctl_kern_corefile(SYSCTL_HANDLER_ARGS) -{ - int error; - - sx_xlock(&corefilename_lock); - error = sysctl_handle_string(oidp, corefilename, sizeof(corefilename), - req); - sx_xunlock(&corefilename_lock); - - return (error); -} -SYSCTL_PROC(_kern, OID_AUTO, corefile, CTLTYPE_STRING | CTLFLAG_RW | - CTLFLAG_MPSAFE, 0, 0, sysctl_kern_corefile, "A", - "Process corefile name format string"); - -static void -vnode_close_locked(struct thread *td, struct vnode *vp) -{ - - VOP_UNLOCK(vp); - vn_close(vp, FWRITE, td->td_ucred, td); -} - -/* *** 767 LINES SKIPPED ***