The branch main has been updated by kevans:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=ce51f79913aa28a32217a424845a2649019535af

commit ce51f79913aa28a32217a424845a2649019535af
Author:     Kyle Evans <kev...@freebsd.org>
AuthorDate: 2025-07-15 21:38:30 +0000
Commit:     Kyle Evans <kev...@freebsd.org>
CommitDate: 2025-07-26 21:31:41 +0000

    kern: abstract away the vnode coredumper to allow pluggable dumpers
    
    The default and only stock coredumper will continue to be the
    traditional vnode dumper, which will dump to a vnode and issue a devctl
    notification. With this change, one can write a kmod that injects custom
    handling of user coredumps that offers richer behavior, particularly in
    case one wants to add more metadata than we can tap out via devd.
    
    The main motivation here is to pave the way for my usercore daemon to be
    able to reroute coredumps before they ever touch the disk.  In some
    cases they may be discarded and we can avoid the overhead of writing
    anything, in others they allow us to capture coredumps that would be
    written into an area that's transient in nature (e.g., kyua test work
    directories) without having to do more tricks to keep those alive.  My
    WIP kmod writes the coredump into a shmfd instead of a vnode, then
    installs that into ucored(8) with every read(2) of /dev/ucore.  This
    also allows me to capture more metadata reliably before the process and
    jail disappear.
    
    Reviewed by:    kib (earlier version), markj
    Differential Revision:  https://reviews.freebsd.org/D51338
---
 share/man/man5/core.5                |  40 ++++-----
 share/man/man9/Makefile              |   2 +
 share/man/man9/coredumper_register.9 | 168 +++++++++++++++++++++++++++++++++++
 sys/kern/coredump_vnode.c            |  11 ++-
 sys/kern/kern_exec.c                 |   2 +-
 sys/kern/kern_ucoredump.c            |  80 ++++++++++++++++-
 sys/sys/ucoredump.h                  |  33 ++++++-
 7 files changed, 311 insertions(+), 25 deletions(-)

diff --git a/share/man/man5/core.5 b/share/man/man5/core.5
index aa6e3c67097d..628fdb7920bb 100644
--- a/share/man/man5/core.5
+++ b/share/man/man5/core.5
@@ -48,26 +48,6 @@ a system crash.
 (In this event, the decision to save the core file is arbitrary, see
 .Xr savecore 8 . )
 .Pp
-The maximum size of a core file is limited by the
-.Dv RLIMIT_CORE
-.Xr setrlimit 2
-limit.
-Files which would be larger than the limit are not created.
-.Pp
-With a large limit, a process that had mapped a very large,
-and perhaps sparsely populated, virtual memory region, could take
-a very long time to create core dumps.
-The system ignores all signals sent to a process writing a core file, except
-.Dv SIGKILL
-which terminates the writing and causes immediate exit of the process.
-The behavior of
-.Dv SIGKILL
-can be disabled by setting tunable
-.Xr sysctl 8
-variable
-.Va kern.core_dump_can_intr
-to zero.
-.Pp
 The name of the file is controlled via the
 .Xr sysctl 8
 variable
@@ -107,6 +87,26 @@ yielding the traditional
 .Fx
 behaviour.
 .Pp
+The maximum size of a core file is limited by the
+.Dv RLIMIT_CORE
+.Xr setrlimit 2
+limit.
+Files which would be larger than the limit are not created.
+.Pp
+With a large limit, a process that had mapped a very large,
+and perhaps sparsely populated, virtual memory region, could take
+a very long time to create core dumps.
+The system ignores all signals sent to a process writing a core file, except
+.Dv SIGKILL
+which terminates the writing and causes immediate exit of the process.
+The behavior of
+.Dv SIGKILL
+can be disabled by setting tunable
+.Xr sysctl 8
+variable
+.Va kern.core_dump_can_intr
+to zero.
+.Pp
 By default, a process that changes user or group credentials whether
 real or effective will not create a corefile.
 This behaviour can be
diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile
index b73e47b3ef4d..5bcde3030ebc 100644
--- a/share/man/man9/Makefile
+++ b/share/man/man9/Makefile
@@ -69,6 +69,7 @@ MAN=  accept_filter.9 \
        config_intrhook.9 \
        contigmalloc.9 \
        copy.9 \
+       coredumper_register.9 \
        counter.9 \
        cpu_machdep.9 \
        cpuset.9 \
@@ -905,6 +906,7 @@ MLINKS+=copy.9 copyin.9 \
        copy.9 copyout.9 \
        copy.9 copyout_nofault.9 \
        copy.9 copystr.9
+MLINKS+=coredumper_register.9 coredumper_unregister.9
 MLINKS+=counter.9 counter_u64_alloc.9 \
        counter.9 counter_u64_free.9 \
        counter.9 counter_u64_add.9 \
diff --git a/share/man/man9/coredumper_register.9 
b/share/man/man9/coredumper_register.9
new file mode 100644
index 000000000000..44f94166f378
--- /dev/null
+++ b/share/man/man9/coredumper_register.9
@@ -0,0 +1,168 @@
+.\"
+.\" SPDX-License-Identifier: BSD-2-Clause
+.\"
+.\" Copyright (c) 2025 Kyle Evans <kev...@freebsd.org>
+.\"
+.Dd July 23, 2025
+.Dt COREDUMPER_REGISTER 9
+.Os
+.Sh NAME
+.Nm coredumper_register ,
+.Nm coredumper_unregister
+.Nd loadable user coredumper support
+.Sh SYNOPSIS
+.In sys/ucoredump.h
+.Ft void
+.Fn coredumper_register "struct coredumper *cd"
+.Ft void
+.Fn coredumper_unregister "struct coredumper *cd"
+.Pp
+.Ft int
+.Fn coredumper_probe_fn "struct thread *td"
+.Ft int
+.Fn coredumper_handle_fn "struct thread *td" "off_t limit"
+.Bd -literal
+/* Incomplete, but the useful members are depicted here. */
+struct coredumper {
+       const char              *cd_name;
+       coredumper_probe_fn     *cd_probe;
+       coredumper_handle_fn    *cd_handle;
+};
+.Ed
+.Pp
+.Ft int
+.Fn coredump_init_fn "const struct coredump_writer *" "const struct 
coredump_params *"
+"int"
+.Ft int
+.Fn coredump_write_fn "const struct coredump_writer *" "const void *" "size_t" 
\
+"off_t" "enum uio_seg" "struct ucred *" "size_t *" "struct thread *"
+.Ft int
+.Fn coredump_extend_fn "const struct coredump_writer *" "off_t" "struct ucred 
*"
+.Bd -literal
+struct coredump_writer {
+       void                    *ctx;
+       coredump_init_fn        *init_fn;
+       coredump_write_fn       *write_fn;
+       coredump_extend_fn      *extend_fn;
+};
+.Ed
+.Sh DESCRIPTION
+The
+.Nm
+mechanism provides a path for kernel modules to register a new user process 
core
+dumper.
+The expected use of
+.Nm
+is for a module to define the fields of the struct coredumper listed above, 
then
+call
+.Fn coredumper_register
+at
+.Dv MOD_LOAD
+time.
+A corresponding
+.Fn coredumper_unregister
+should be called at
+.Dv MOD_UNLOAD
+time.
+Note that
+.Fn coredumper_unregister
+will block until the specified coredumper is no longer processing coredumps.
+.Pp
+When a user process is preparing to start dumping core, the kernel will execute
+the
+.Fn cd_probe
+function for each coredumper currently registered.
+The
+.Fn cd_probe
+function is expected to return either -1 if it would decline to dump the
+process, or a priority level greater than 0.
+The coredumper with the highest priority will handle the coredump.
+The following default priorities are defined:
+.Bl -tag -width indent
+.It Dv COREDUMPER_NOMATCH
+This dumper declines dumping the process.
+.It Dv COREDUMPER_GENERIC
+This dumper will dump the process at the lowest priority.
+This priority is not recommended, as the default vnode dumper will bid at
+.Dv COREDUMPER_GENERIC
+as well.
+.It Dv COREDUMPER_SPECIAL
+This dumper provides special behavior, and will dump the process at a higher
+priority.
+.It Dv COREDUMPER_HIGHPRIORITY
+This dumper would prefer to handle this coredump.
+This may be used by, for instance, a custom or vendor-specific coredump
+mechanism that wishes to preempt others.
+.El
+.Pp
+Note that this system has been designed such that the
+.Fn cd_probe
+function can examine the process in question and make an informed decision.
+Different processes being dumped could probe at different priorities in the
+same coredumper.
+.Pp
+Once the highest priority coredumper has been selected, the
+.Fn cd_handle
+function will be invoked.
+The
+.Fn cd_handle
+will receive both the thread and the
+.Dv RLIMIT_CORE
+.Xr setrlimit 2
+.Fa limit .
+The proc lock will be held on entry, and should be unlocked before the handler
+returns.
+The
+.Fa limit
+is typically passed to the
+.Fn sv_coredump
+that belongs to the process's
+.Va p_sysent .
+.Pp
+The
+.Fn cd_handle
+function should return either 0 if the dump was successful, or an appropriate
+.Xr errno 2
+otherwise.
+.Ss Customized Coredump Writers
+Custom coredumpers can define their own
+.Dv coredump_writer
+to pass to
+.Fn sv_coredump .
+.Pp
+The
+.Va ctx
+member is opaque and only to be used by the coredumper itself.
+.Pp
+The
+.Va init_fn
+function, if it's provided, will be called by the
+.Fn sv_coredump
+implementation before any data is to be written.
+This allows the writer implementation to record any coredump parameters that it
+might need to capture, or setup the object to be written to.
+.Pp
+The
+.Va write_fn
+function will be called by the
+.Fn sv_coredump
+implementation to write out data.
+The
+.Va extend_fn
+function will be called to enlarge the coredump, in the sense that a hole is
+created in any difference between the current size and the new size.
+For convenience, the
+.Fn core_vn_write
+and
+.Fn core_vn_extend
+functions used by the vnode coredumper are exposed in
+.In sys/ucordumper.h ,
+and the
+.Dv coredump_vnode_ctx
+defined there should be populated with the vnode to write to.
+.Sh SEE ALSO
+.Xr setrlimit 2 ,
+.Xr core 5
+.Sh AUTHORS
+This manual page was written by
+.An Kyle Evans Aq Mt kev...@freebsd.org .
diff --git a/sys/kern/coredump_vnode.c b/sys/kern/coredump_vnode.c
index 675503476a4e..8b857e9aa4a2 100644
--- a/sys/kern/coredump_vnode.c
+++ b/sys/kern/coredump_vnode.c
@@ -89,6 +89,15 @@
 #define        NUM_CORE_FILES 5
 #endif
 
+static coredumper_handle_fn    coredump_vnode;
+static struct coredumper vnode_coredumper = {
+       .cd_name = "vnode_coredumper",
+       .cd_handle = coredump_vnode,
+};
+
+SYSINIT(vnode_coredumper_register, SI_SUB_EXEC, SI_ORDER_ANY,
+    coredumper_register, &vnode_coredumper);
+
 _Static_assert(NUM_CORE_FILES >= 0 && NUM_CORE_FILES <= MAX_NUM_CORE_FILES,
     "NUM_CORE_FILES is out of range (0 to " __STRING(MAX_NUM_CORE_FILES) ")");
 static int num_cores = NUM_CORE_FILES;
@@ -420,7 +429,7 @@ corefile_open(const char *comm, uid_t uid, pid_t pid, 
struct thread *td,
  * one.  If there _is not_ one, it returns ENOSYS; otherwise it returns the
  * error from the process-specific routine.
  */
-int
+static int
 coredump_vnode(struct thread *td, off_t limit)
 {
        struct proc *p = td->td_proc;
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 5cd4d39d7236..0fc2d0e7f1bc 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -2010,7 +2010,7 @@ core_write(struct coredump_params *cp, const void *base, 
size_t len,
 static int
 core_extend(struct coredump_params *cp, off_t newsz)
 {
-       return ((*cp->cdw->extend_fn)(cp->cdw, newsz, cp->td->td_ucred));
+       return ((*cp->cdw->extend_fn)(cp->cdw, newsz, cp->active_cred));
 }
 
 int
diff --git a/sys/kern/kern_ucoredump.c b/sys/kern/kern_ucoredump.c
index 4192928be614..a2412bf06441 100644
--- a/sys/kern/kern_ucoredump.c
+++ b/sys/kern/kern_ucoredump.c
@@ -38,12 +38,14 @@
 #include <sys/acct.h>
 #include <sys/compressor.h>
 #include <sys/jail.h>
+#include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/signalvar.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
+#include <sys/rmlock.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/ucoredump.h>
@@ -53,6 +55,11 @@ static int coredump(struct thread *td);
 
 int compress_user_cores = 0;
 
+static SLIST_HEAD(, coredumper)        coredumpers =
+    SLIST_HEAD_INITIALIZER(coredumpers);
+static struct rmlock   coredump_rmlock;
+RM_SYSINIT(coredump_lock, &coredump_rmlock, "coredump_lock");
+
 static int kern_logsigexit = 1;
 SYSCTL_INT(_kern, KERN_LOGSIGEXIT, logsigexit, CTLFLAG_RW,
     &kern_logsigexit, 0,
@@ -92,6 +99,30 @@ SYSCTL_INT(_kern, OID_AUTO, compress_user_cores_level, 
CTLFLAG_RWTUN,
     &compress_user_cores_level, 0,
     "Corefile compression level");
 
+void
+coredumper_register(struct coredumper *cd)
+{
+
+       blockcount_init(&cd->cd_refcount);
+       rm_wlock(&coredump_rmlock);
+       SLIST_INSERT_HEAD(&coredumpers, cd, cd_entry);
+       rm_wunlock(&coredump_rmlock);
+}
+
+void
+coredumper_unregister(struct coredumper *cd)
+{
+
+       rm_wlock(&coredump_rmlock);
+       SLIST_REMOVE(&coredumpers, cd, coredumper, cd_entry);
+       rm_wunlock(&coredump_rmlock);
+
+       /*
+        * Wait for any in-process coredumps to finish before returning.
+        */
+       blockcount_wait(&cd->cd_refcount, NULL, "dumpwait", 0);
+}
+
 /*
  * Force the current process to exit with the specified signal, dumping core
  * if appropriate.  We bypass the normal tests for masked and caught signals,
@@ -178,9 +209,11 @@ sigexit(struct thread *td, int sig)
 static int
 coredump(struct thread *td)
 {
+       struct coredumper *iter, *chosen;
        struct proc *p = td->td_proc;
+       struct rm_priotracker tracker;
        off_t limit;
-       int error;
+       int error, priority;
 
        PROC_LOCK_ASSERT(p, MA_OWNED);
        MPASS((p->p_flag & P_HADTHREADS) == 0 || p->p_singlethread == td);
@@ -205,8 +238,51 @@ coredump(struct thread *td)
                return (EFBIG);
        }
 
-       error = coredump_vnode(td, limit);
+       rm_rlock(&coredump_rmlock, &tracker);
+       priority = -1;
+       chosen = NULL;
+       SLIST_FOREACH(iter, &coredumpers, cd_entry) {
+               if (iter->cd_probe == NULL) {
+                       /*
+                        * If we haven't found anything of a higher priority
+                        * yet, we'll call this a GENERIC.  Ideally, we want
+                        * coredumper modules to include a probe function.
+                        */
+                       if (priority < 0) {
+                               priority = COREDUMPER_GENERIC;
+                               chosen = iter;
+                       }
+
+                       continue;
+               }
+
+               error = (*iter->cd_probe)(td);
+               if (error < 0)
+                       continue;
+
+               /*
+                * Higher priority than previous options.
+                */
+               if (error > priority) {
+                       priority = error;
+                       chosen = iter;
+               }
+       }
+
+       /*
+        * Acquire our refcount before we drop the lock so that
+        * coredumper_unregister() can safely assume that the refcount will only
+        * go down once it's dropped the rmlock.
+        */
+       blockcount_acquire(&chosen->cd_refcount, 1);
+       rm_runlock(&coredump_rmlock, &tracker);
+
+       /* Currently, we always have the vnode dumper built in. */
+       MPASS(chosen != NULL);
+       error = ((*chosen->cd_handle)(td, limit));
        PROC_LOCK_ASSERT(p, MA_NOTOWNED);
 
+       blockcount_release(&chosen->cd_refcount, 1);
+
        return (error);
 }
diff --git a/sys/sys/ucoredump.h b/sys/sys/ucoredump.h
index b543c130b9dc..0a51ee7f50c8 100644
--- a/sys/sys/ucoredump.h
+++ b/sys/sys/ucoredump.h
@@ -13,6 +13,8 @@
 #ifdef _KERNEL
 
 #include <sys/_uio.h>
+#include <sys/blockcount.h>
+#include <sys/queue.h>
 
 /* Coredump output parameters. */
 struct coredump_params;
@@ -34,7 +36,6 @@ struct coredump_vnode_ctx {
 
 coredump_write_fn core_vn_write;
 coredump_extend_fn core_vn_extend;
-int coredump_vnode(struct thread *, off_t);
 
 struct coredump_writer {
        void                    *ctx;
@@ -64,5 +65,35 @@ extern int coredump_pack_vmmapinfo;
 extern int compress_user_cores;
 extern int compress_user_cores_level;
 
+typedef int coredumper_probe_fn(struct thread *);
+
+/*
+ * Some arbitrary values for coredumper probes to return.  The highest priority
+ * we can find wins.  It's somewhat expected that a coredumper may want to bid
+ * differently based on the process in question.  Note that probe functions 
will
+ * be called with the proc lock held, so they must not sleep.
+ */
+#define        COREDUMPER_NOMATCH              (-1)    /* Decline to touch it 
*/
+#define        COREDUMPER_GENERIC              (0)     /* I handle coredumps */
+#define        COREDUMPER_SPECIAL              (50)    /* Special handler */
+#define        COREDUMPER_HIGH_PRIORITY        (100)   /* High-priority 
handler */
+
+/*
+ * The handle functions will be called with the proc lock held, and should
+ * return with the proc lock dropped.
+ */
+typedef int coredumper_handle_fn(struct thread *, off_t);
+
+struct coredumper {
+       SLIST_ENTRY(coredumper)  cd_entry;
+       const char              *cd_name;
+       coredumper_probe_fn     *cd_probe;
+       coredumper_handle_fn    *cd_handle;
+       blockcount_t             cd_refcount;
+};
+
+void coredumper_register(struct coredumper *);
+void coredumper_unregister(struct coredumper *);
+
 #endif /* _KERNEL */
 #endif /* _SYS_UCOREDUMP_H_ */

Reply via email to