It has been asked in the past if OpenBSD's kevent(2) should implement user event filters, also known as EVFILT_USER. This filter type originates from FreeBSD but is now available also on DragonFly BSD, NetBSD, and macOS.
Below is an implementation of EVFILT_USER. The logic should be fairly straightforward. However, the filter type needs a special case in kqueue_register() to allow triggering a previously registered user event without using EV_ADD. The code limits the number of user events. Otherwise the user could allocate copious amounts of kernel memory. The limit is per process so that programs will not interfere with each other. The current limit is arbitrary and might need adjusting later. Hopefully a sysctl knob will not be necessary. I am in two minds about EVFILT_USER. On the one hand, having it on OpenBSD might help with ports. On the other hand, it makes the kernel perform a task that userspace can already handle using existing interfaces. Index: lib/libc/sys/kqueue.2 =================================================================== RCS file: src/lib/libc/sys/kqueue.2,v retrieving revision 1.46 diff -u -p -r1.46 kqueue.2 --- lib/libc/sys/kqueue.2 31 Mar 2022 17:27:16 -0000 1.46 +++ lib/libc/sys/kqueue.2 30 Apr 2022 13:33:10 -0000 @@ -487,6 +487,44 @@ A device change event has occurred, e.g. On return, .Fa fflags contains the events which triggered the filter. +.It Dv EVFILT_USER +Establishes a user event identified by +.Va ident +which is not associated with any kernel mechanism but is triggered by +user level code. +The lower 24 bits of the +.Va fflags +may be used for user defined flags and manipulated using the following: +.Bl -tag -width XXNOTE_FFLAGSMASK +.It Dv NOTE_FFNOP +Ignore the input +.Va fflags . +.It Dv NOTE_FFAND +Bitwise AND +.Va fflags . +.It Dv NOTE_FFOR +Bitwise OR +.Va fflags . +.It Dv NOTE_FFCOPY +Copy +.Va fflags . +.It Dv NOTE_FFCTRLMASK +Control mask for +.Va fflags . +.It Dv NOTE_FFLAGSMASK +User defined flag mask for +.Va fflags . +.El +.Pp +A user event is triggered for output with the following: +.Bl -tag -width XXNOTE_FFLAGSMASK +.It Dv NOTE_TRIGGER +Cause the event to be triggered. +.El +.Pp +On return, +.Va fflags +contains the users defined flags in the lower 24 bits. .El .Sh RETURN VALUES .Fn kqueue Index: regress/sys/kern/kqueue/Makefile =================================================================== RCS file: src/regress/sys/kern/kqueue/Makefile,v retrieving revision 1.31 diff -u -p -r1.31 Makefile --- regress/sys/kern/kqueue/Makefile 30 Mar 2022 05:11:52 -0000 1.31 +++ regress/sys/kern/kqueue/Makefile 30 Apr 2022 13:33:12 -0000 @@ -4,7 +4,7 @@ PROG= kqueue-test CFLAGS+=-Wall SRCS= kqueue-pipe.c kqueue-fork.c main.c kqueue-process.c kqueue-random.c \ kqueue-pty.c kqueue-tun.c kqueue-signal.c kqueue-fdpass.c \ - kqueue-flock.c kqueue-timer.c kqueue-regress.c + kqueue-flock.c kqueue-timer.c kqueue-regress.c kqueue-user.c LDADD= -levent -lutil DPADD= ${LIBEVENT} ${LIBUTIL} @@ -50,6 +50,8 @@ kq-regress-5: ${PROG} ./${PROG} -R5 kq-regress-6: ${PROG} ./${PROG} -R6 +kq-user: ${PROG} + ./${PROG} -u TESTS+= kq-fdpass TESTS+= kq-flock @@ -70,6 +72,7 @@ TESTS+= kq-reset-timer TESTS+= kq-signal TESTS+= kq-timer TESTS+= kq-tun +TESTS+= kq-user REGRESS_TARGETS=${TESTS} REGRESS_ROOT_TARGETS=kq-pty-1 Index: regress/sys/kern/kqueue/kqueue-user.c =================================================================== RCS file: regress/sys/kern/kqueue/kqueue-user.c diff -N regress/sys/kern/kqueue/kqueue-user.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ regress/sys/kern/kqueue/kqueue-user.c 30 Apr 2022 13:33:12 -0000 @@ -0,0 +1,189 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2022 Visa Hankala + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <sys/time.h> +#include <sys/event.h> + +#include <err.h> +#include <errno.h> +#include <unistd.h> + +#include "main.h" + +int +do_user(void) +{ + const struct timespec ts = { 0, 10000 }; + struct kevent kev[2]; + int dummy, dummy2, i, kq, n; + + ASS((kq = kqueue()) >= 0, + warn("kqueue")); + + /* Set up an event. */ + EV_SET(&kev[0], 1, EVFILT_USER, EV_ADD, ~0U & ~NOTE_TRIGGER, 0, NULL); + ASS(kevent(kq, kev, 1, NULL, 0, NULL) == 0, + warn("kevent")); + + n = kevent(kq, NULL, 0, kev, 2, &ts); + ASSX(n == 0); + + /* + * Activate the event. + * Fields `data' and `udata' do not get updated without EV_ADD. + */ + EV_SET(&kev[0], 1, EVFILT_USER, 0, NOTE_TRIGGER | NOTE_FFNOP, + 123, &dummy); + n = kevent(kq, kev, 1, NULL, 0, NULL); + ASSX(n == 0); + + /* Check active events. */ + n = kevent(kq, NULL, 0, kev, 2, &ts); + ASSX(n == 1); + ASSX(kev[0].ident == 1); + ASSX(kev[0].fflags == NOTE_FFLAGSMASK); + ASSX(kev[0].data == 0); + ASSX(kev[0].udata == NULL); + + /* Activate the event. Update `data' and `udata'. */ + EV_SET(&kev[0], 1, EVFILT_USER, EV_ADD, NOTE_TRIGGER | NOTE_FFNOP, + 123, &dummy); + n = kevent(kq, kev, 1, NULL, 0, NULL); + ASSX(n == 0); + + /* Check active events. */ + n = kevent(kq, NULL, 0, kev, 2, &ts); + ASSX(n == 1); + ASSX(kev[0].ident == 1); + ASSX(kev[0].fflags == NOTE_FFLAGSMASK); + ASSX(kev[0].data == 123); + ASSX(kev[0].udata == &dummy); + + /* Set up another event. */ + EV_SET(&kev[0], 2, EVFILT_USER, EV_ADD, NOTE_TRIGGER, 654, &dummy2); + n = kevent(kq, kev, 1, NULL, 0, NULL); + ASSX(n == 0); + + /* Check active events. This assumes a specific output order. */ + n = kevent(kq, NULL, 0, kev, 2, &ts); + ASSX(n == 2); + ASSX(kev[0].ident == 1); + ASSX(kev[0].fflags == NOTE_FFLAGSMASK); + ASSX(kev[0].data == 123); + ASSX(kev[0].udata == &dummy); + ASSX(kev[1].ident == 2); + ASSX(kev[1].fflags == 0); + ASSX(kev[1].data == 654); + ASSX(kev[1].udata == &dummy2); + + /* Clear the first event. */ + EV_SET(&kev[0], 1, EVFILT_USER, EV_CLEAR, 0, 0, NULL); + n = kevent(kq, kev, 1, NULL, 0, NULL); + ASSX(n == 0); + + n = kevent(kq, NULL, 0, kev, 2, &ts); + ASSX(n == 1); + ASSX(kev[0].ident == 2); + ASSX(kev[0].fflags == 0); + ASSX(kev[0].data == 654); + ASSX(kev[0].udata == &dummy2); + + /* Delete the second event. */ + EV_SET(&kev[0], 2, EVFILT_USER, EV_DELETE, 0, 0, NULL); + n = kevent(kq, kev, 1, NULL, 0, NULL); + ASSX(n == 0); + + n = kevent(kq, NULL, 0, kev, 2, &ts); + ASSX(n == 0); + + /* Test self-clearing event. */ + EV_SET(&kev[0], 2, EVFILT_USER, EV_ADD | EV_CLEAR, 0x11, 42, &dummy); + n = kevent(kq, kev, 1, kev, 2, &ts); + ASSX(n == 0); + + EV_SET(&kev[0], 2, EVFILT_USER, 0, NOTE_TRIGGER | 0x3, 24, &dummy2); + n = kevent(kq, kev, 1, kev, 2, &ts); + ASSX(n == 1); + ASSX(kev[0].ident == 2); + ASSX(kev[0].fflags == 0x11); + ASSX(kev[0].data == 42); + ASSX(kev[0].udata == &dummy); + + n = kevent(kq, NULL, 0, kev, 2, &ts); + ASSX(n == 0); + + EV_SET(&kev[0], 2, EVFILT_USER, 0, NOTE_TRIGGER | 0x3, 9, &dummy2); + n = kevent(kq, kev, 1, kev, 2, &ts); + ASSX(n == 1); + ASSX(kev[0].ident == 2); + ASSX(kev[0].fflags == 0); + ASSX(kev[0].data == 0); + ASSX(kev[0].udata == &dummy); + + EV_SET(&kev[0], 2, EVFILT_USER, EV_DELETE, 0, 0, NULL); + n = kevent(kq, kev, 1, kev, 2, &ts); + ASSX(n == 0); + + /* Change fflags. */ + EV_SET(&kev[0], 1, EVFILT_USER, 0, NOTE_FFCOPY | 0x00aa00, 0, NULL); + n = kevent(kq, kev, 1, kev, 2, &ts); + ASSX(n == 0); + EV_SET(&kev[0], 1, EVFILT_USER, 0, NOTE_FFOR | 0xff00ff, 0, NULL); + n = kevent(kq, kev, 1, kev, 2, &ts); + ASSX(n == 0); + EV_SET(&kev[0], 1, EVFILT_USER, 0, NOTE_TRIGGER | NOTE_FFAND | 0x0ffff0, + 0, NULL); + n = kevent(kq, kev, 1, kev, 2, &ts); + ASSX(n == 1); + ASSX(kev[0].ident == 1); + ASSX(kev[0].fflags == 0x0faaf0); + ASSX(kev[0].data == 0); + ASSX(kev[0].udata == &dummy); + + /* Test event limit. */ + for (i = 0;; i++) { + EV_SET(&kev[0], i, EVFILT_USER, EV_ADD, 0, 0, NULL); + n = kevent(kq, kev, 1, NULL, 0, NULL); + if (n == -1) { + ASSX(errno == ENOMEM); + break; + } + ASSX(n == 0); + } + ASSX(i < 1000000); + + /* Delete one event, ... */ + EV_SET(&kev[0], 0, EVFILT_USER, EV_DELETE, 0, 0, NULL); + n = kevent(kq, kev, 1, NULL, 0, NULL); + ASSX(n == 0); + + /* ... after which adding should succeed. */ + EV_SET(&kev[0], 0, EVFILT_USER, EV_ADD, 0, 0, NULL); + n = kevent(kq, kev, 1, NULL, 0, NULL); + ASSX(n == 0); + + EV_SET(&kev[0], i, EVFILT_USER, EV_ADD, 0, 0, NULL); + n = kevent(kq, kev, 1, NULL, 0, NULL); + ASSX(n == -1); + ASSX(errno == ENOMEM); + + close(kq); + + return (0); +} Index: regress/sys/kern/kqueue/main.c =================================================================== RCS file: src/regress/sys/kern/kqueue/main.c,v retrieving revision 1.15 diff -u -p -r1.15 main.c --- regress/sys/kern/kqueue/main.c 12 Jun 2021 13:30:14 -0000 1.15 +++ regress/sys/kern/kqueue/main.c 30 Apr 2022 13:33:12 -0000 @@ -17,7 +17,7 @@ main(int argc, char **argv) int n, ret, c; ret = 0; - while ((c = getopt(argc, argv, "fFiIjlpPrR:stT:")) != -1) { + while ((c = getopt(argc, argv, "fFiIjlpPrR:stT:u")) != -1) { switch (c) { case 'f': ret |= check_inheritance(); @@ -60,8 +60,11 @@ main(int argc, char **argv) n = strtonum(optarg, 1, INT_MAX, NULL); ret |= do_pty(n); break; + case 'u': + ret |= do_user(); + break; default: - fprintf(stderr, "usage: %s -[fFiIlpPrstT] [-R n]\n", + fprintf(stderr, "usage: %s -[fFiIlpPrstTu] [-R n]\n", __progname); exit(1); } Index: regress/sys/kern/kqueue/main.h =================================================================== RCS file: src/regress/sys/kern/kqueue/main.h,v retrieving revision 1.6 diff -u -p -r1.6 main.h --- regress/sys/kern/kqueue/main.h 12 Jun 2021 13:30:14 -0000 1.6 +++ regress/sys/kern/kqueue/main.h 30 Apr 2022 13:33:12 -0000 @@ -28,3 +28,4 @@ int do_reset_timer(void); int do_signal(void); int do_timer(void); int do_tun(void); +int do_user(void); Index: sys/kern/kern_descrip.c =================================================================== RCS file: src/sys/kern/kern_descrip.c,v retrieving revision 1.205 diff -u -p -r1.205 kern_descrip.c --- sys/kern/kern_descrip.c 20 Jan 2022 11:06:57 -0000 1.205 +++ sys/kern/kern_descrip.c 30 Apr 2022 13:33:09 -0000 @@ -39,6 +39,7 @@ #include <sys/param.h> #include <sys/systm.h> +#include <sys/atomic.h> #include <sys/filedesc.h> #include <sys/kernel.h> #include <sys/vnode.h> @@ -1215,6 +1216,7 @@ fdfree(struct proc *p) vrele(fdp->fd_cdir); if (fdp->fd_rdir) vrele(fdp->fd_rdir); + KASSERT(atomic_load_int(&fdp->fd_nuserevents) == 0); pool_put(&fdesc_pool, fdp); } Index: sys/kern/kern_event.c =================================================================== RCS file: src/sys/kern/kern_event.c,v retrieving revision 1.186 diff -u -p -r1.186 kern_event.c --- sys/kern/kern_event.c 31 Mar 2022 01:41:22 -0000 1.186 +++ sys/kern/kern_event.c 30 Apr 2022 13:33:09 -0000 @@ -30,6 +30,7 @@ #include <sys/param.h> #include <sys/systm.h> +#include <sys/atomic.h> #include <sys/kernel.h> #include <sys/proc.h> #include <sys/pledge.h> @@ -138,6 +139,10 @@ int filt_timerattach(struct knote *kn); void filt_timerdetach(struct knote *kn); int filt_timermodify(struct kevent *kev, struct knote *kn); int filt_timerprocess(struct knote *kn, struct kevent *kev); +int filt_userattach(struct knote *kn); +void filt_userdetach(struct knote *kn); +int filt_usermodify(struct kevent *kev, struct knote *kn); +int filt_userprocess(struct knote *kn, struct kevent *kev); void filt_seltruedetach(struct knote *kn); const struct filterops kqread_filtops = { @@ -172,11 +177,21 @@ const struct filterops timer_filtops = { .f_process = filt_timerprocess, }; +const struct filterops user_filtops = { + .f_flags = FILTEROP_MPSAFE, + .f_attach = filt_userattach, + .f_detach = filt_userdetach, + .f_event = NULL, + .f_modify = filt_usermodify, + .f_process = filt_userprocess, +}; + struct pool knote_pool; struct pool kqueue_pool; struct mutex kqueue_klist_lock = MUTEX_INITIALIZER(IPL_MPFLOOR); int kq_ntimeouts = 0; int kq_timeoutmax = (4 * 1024); +unsigned int kq_usereventsmax = 1024; /* per process */ #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) @@ -193,6 +208,7 @@ const struct filterops *const sysfilt_op &timer_filtops, /* EVFILT_TIMER */ &file_filtops, /* EVFILT_DEVICE */ &file_filtops, /* EVFILT_EXCEPT */ + &user_filtops, /* EVFILT_USER */ }; void @@ -561,6 +577,91 @@ filt_timerprocess(struct knote *kn, stru return (active); } +int +filt_userattach(struct knote *kn) +{ + struct filedesc *fdp = kn->kn_kq->kq_fdp; + u_int nuserevents; + + nuserevents = atomic_inc_int_nv(&fdp->fd_nuserevents); + if (nuserevents > atomic_load_int(&kq_usereventsmax)) { + atomic_dec_int(&fdp->fd_nuserevents); + return (ENOMEM); + } + + kn->kn_ptr.p_useract = ((kn->kn_sfflags & NOTE_TRIGGER) != 0); + kn->kn_fflags = kn->kn_sfflags & NOTE_FFLAGSMASK; + kn->kn_data = kn->kn_sdata; + + return (0); +} + +void +filt_userdetach(struct knote *kn) +{ + struct filedesc *fdp = kn->kn_kq->kq_fdp; + + atomic_dec_int(&fdp->fd_nuserevents); +} + +int +filt_usermodify(struct kevent *kev, struct knote *kn) +{ + unsigned int ffctrl, fflags; + + if (kev->fflags & NOTE_TRIGGER) + kn->kn_ptr.p_useract = 1; + + ffctrl = kev->fflags & NOTE_FFCTRLMASK; + fflags = kev->fflags & NOTE_FFLAGSMASK; + switch (ffctrl) { + case NOTE_FFNOP: + break; + case NOTE_FFAND: + kn->kn_fflags &= fflags; + break; + case NOTE_FFOR: + kn->kn_fflags |= fflags; + break; + case NOTE_FFCOPY: + kn->kn_fflags = fflags; + break; + default: + /* ignored, should not happen */ + break; + } + + if (kev->flags & EV_ADD) { + kn->kn_data = kev->data; + kn->kn_udata = kev->udata; + } + + /* Allow clearing of an activated event. */ + if (kev->flags & EV_CLEAR) { + kn->kn_ptr.p_useract = 0; + kn->kn_data = 0; + } + + return (kn->kn_ptr.p_useract); +} + +int +filt_userprocess(struct knote *kn, struct kevent *kev) +{ + int active; + + active = kn->kn_ptr.p_useract; + if (active && kev != NULL) { + *kev = kn->kn_kevent; + if (kn->kn_flags & EV_CLEAR) { + kn->kn_ptr.p_useract = 0; + kn->kn_fflags = 0; + kn->kn_data = 0; + } + } + + return (active); +} /* * filt_seltrue: @@ -1221,6 +1322,17 @@ again: filter_detach(kn); knote_drop(kn, p); goto done; + } else if (kn->kn_fop == &user_filtops) { + /* Call f_modify to allow NOTE_TRIGGER without EV_ADD. */ + mtx_leave(&kq->kq_lock); + active = filter_modify(kev, kn); + mtx_enter(&kq->kq_lock); + if (active) + knote_activate(kn); + if (kev->flags & EV_ERROR) { + error = kev->data; + goto release; + } } if ((kev->flags & EV_DISABLE) && ((kn->kn_status & KN_DISABLED) == 0)) Index: sys/sys/event.h =================================================================== RCS file: src/sys/sys/event.h,v retrieving revision 1.67 diff -u -p -r1.67 event.h --- sys/sys/event.h 31 Mar 2022 01:41:22 -0000 1.67 +++ sys/sys/event.h 30 Apr 2022 13:33:13 -0000 @@ -40,8 +40,9 @@ #define EVFILT_TIMER (-7) /* timers */ #define EVFILT_DEVICE (-8) /* devices */ #define EVFILT_EXCEPT (-9) /* exceptional conditions */ +#define EVFILT_USER (-10) /* user event */ -#define EVFILT_SYSCOUNT 9 +#define EVFILT_SYSCOUNT 10 #define EV_SET(kevp, a, b, c, d, e, f) do { \ struct kevent *__kevp = (kevp); \ @@ -123,6 +124,19 @@ struct kevent { #define NOTE_CHANGE 0x00000001 /* device change event */ /* + * data/hint flags for EVFILT_USER, shared with userspace + */ +#define NOTE_FFNOP 0x00000000 /* ignore input fflags */ +#define NOTE_FFAND 0x40000000 /* AND fflags */ +#define NOTE_FFOR 0x80000000 /* OR fflags */ +#define NOTE_FFCOPY 0xc0000000 /* copy fflags */ + +#define NOTE_FFCTRLMASK 0xc0000000 /* masks for operations */ +#define NOTE_FFLAGSMASK 0x00ffffff + +#define NOTE_TRIGGER 0x01000000 /* trigger the event */ + +/* * This is currently visible to userland to work around broken * programs which pull in <sys/proc.h> or <sys/selinfo.h>. */ @@ -243,6 +257,7 @@ struct knote { union { struct file *p_fp; /* file data pointer */ struct process *p_process; /* process pointer */ + int p_useract; /* user event active */ } kn_ptr; const struct filterops *kn_fop; void *kn_hook; /* [o] */ Index: sys/sys/filedesc.h =================================================================== RCS file: src/sys/sys/filedesc.h,v retrieving revision 1.45 diff -u -p -r1.45 filedesc.h --- sys/sys/filedesc.h 4 Jul 2020 08:06:08 -0000 1.45 +++ sys/sys/filedesc.h 30 Apr 2022 13:33:13 -0000 @@ -87,6 +87,7 @@ struct filedesc { LIST_HEAD(, kqueue) fd_kqlist; /* [f] kqueues attached to this * filedesc */ int fd_flags; /* [a] flags on this filedesc */ + u_int fd_nuserevents; /* [a] number of kqueue user events */ }; /* Index: usr.bin/kdump/mksubr =================================================================== RCS file: src/usr.bin/kdump/mksubr,v retrieving revision 1.38 diff -u -p -r1.38 mksubr --- usr.bin/kdump/mksubr 22 Feb 2022 17:35:01 -0000 1.38 +++ usr.bin/kdump/mksubr 30 Apr 2022 13:33:13 -0000 @@ -559,6 +559,27 @@ _EOF_ printf "\t\tif_print_or(fflags, %s, or);\n", $i }' cat <<_EOF_ break; + case EVFILT_USER: + if (fflags & NOTE_FFCTRLMASK) { + switch (fflags & NOTE_FFCTRLMASK) { + case NOTE_FFAND: + printf("NOTE_FFAND"); + break; + case NOTE_FFOR: + printf("NOTE_FFOR"); + break; + case NOTE_FFCOPY: + printf("NOTE_FFCOPY"); + break; + } + or = 1; + } + if_print_or(fflags, NOTE_TRIGGER, or); + if (fflags & NOTE_FFLAGSMASK) { + printf("%s%#x", or ? "|" : "", + fflags & NOTE_FFLAGSMASK); + } + break; } printf(">"); }