Module Name: src Committed By: christos Date: Fri Jul 28 18:19:01 UTC 2023
Modified Files: src/distrib/sets/lists/comp: mi src/distrib/sets/lists/debug: mi src/distrib/sets/lists/tests: mi src/lib/libc/compat/sys: compat_kevent.c src/lib/libc/sys: Makefile.inc kqueue.2 src/lib/libpthread: pthread_cancelstub.c src/lib/librumpclient: rumpclient.c src/lib/librumphijack: hijack.c src/sys/compat/common: compat_100_mod.c compat_mod.h files.common kern_select_50.c src/sys/compat/linux/arch/amd64: syscalls.master src/sys/compat/linux/common: linux_misc.c linux_misc.h src/sys/compat/sys: event.h src/sys/kern: files.kern kern_event.c makesyscalls.sh syscalls.conf syscalls.master src/sys/rump: Makefile.rump rump.sysmap src/sys/sys: Makefile event.h syscall.h src/tests/kernel: Makefile Added Files: src/lib/libc/sys: epoll.2 epoll.c src/sys/compat/common: kern_event_100.c src/sys/kern: sys_epoll.c src/sys/sys: epoll.h src/tests/kernel: t_epoll.c Log Message: Add epoll(2) from Theodore Preduta as part of GSoC 2023 To generate a diff of this commit: cvs rdiff -u -r1.2437 -r1.2438 src/distrib/sets/lists/comp/mi cvs rdiff -u -r1.406 -r1.407 src/distrib/sets/lists/debug/mi cvs rdiff -u -r1.1277 -r1.1278 src/distrib/sets/lists/tests/mi cvs rdiff -u -r1.2 -r1.3 src/lib/libc/compat/sys/compat_kevent.c cvs rdiff -u -r1.251 -r1.252 src/lib/libc/sys/Makefile.inc cvs rdiff -u -r0 -r1.1 src/lib/libc/sys/epoll.2 src/lib/libc/sys/epoll.c cvs rdiff -u -r1.58 -r1.59 src/lib/libc/sys/kqueue.2 cvs rdiff -u -r1.43 -r1.44 src/lib/libpthread/pthread_cancelstub.c cvs rdiff -u -r1.69 -r1.70 src/lib/librumpclient/rumpclient.c cvs rdiff -u -r1.136 -r1.137 src/lib/librumphijack/hijack.c cvs rdiff -u -r1.1 -r1.2 src/sys/compat/common/compat_100_mod.c cvs rdiff -u -r1.7 -r1.8 src/sys/compat/common/compat_mod.h cvs rdiff -u -r1.8 -r1.9 src/sys/compat/common/files.common cvs rdiff -u -r0 -r1.1 src/sys/compat/common/kern_event_100.c cvs rdiff -u -r1.3 -r1.4 src/sys/compat/common/kern_select_50.c cvs rdiff -u -r1.68 -r1.69 src/sys/compat/linux/arch/amd64/syscalls.master cvs rdiff -u -r1.257 -r1.258 src/sys/compat/linux/common/linux_misc.c cvs rdiff -u -r1.26 -r1.27 src/sys/compat/linux/common/linux_misc.h cvs rdiff -u -r1.2 -r1.3 src/sys/compat/sys/event.h cvs rdiff -u -r1.59 -r1.60 src/sys/kern/files.kern cvs rdiff -u -r1.148 -r1.149 src/sys/kern/kern_event.c cvs rdiff -u -r1.186 -r1.187 src/sys/kern/makesyscalls.sh cvs rdiff -u -r0 -r1.1 src/sys/kern/sys_epoll.c cvs rdiff -u -r1.31 -r1.32 src/sys/kern/syscalls.conf cvs rdiff -u -r1.310 -r1.311 src/sys/kern/syscalls.master cvs rdiff -u -r1.134 -r1.135 src/sys/rump/Makefile.rump cvs rdiff -u -r1.9 -r1.10 src/sys/rump/rump.sysmap cvs rdiff -u -r1.180 -r1.181 src/sys/sys/Makefile cvs rdiff -u -r0 -r1.1 src/sys/sys/epoll.h cvs rdiff -u -r1.54 -r1.55 src/sys/sys/event.h cvs rdiff -u -r1.322 -r1.323 src/sys/sys/syscall.h cvs rdiff -u -r1.72 -r1.73 src/tests/kernel/Makefile cvs rdiff -u -r0 -r1.1 src/tests/kernel/t_epoll.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/distrib/sets/lists/comp/mi diff -u src/distrib/sets/lists/comp/mi:1.2437 src/distrib/sets/lists/comp/mi:1.2438 --- src/distrib/sets/lists/comp/mi:1.2437 Sun Jul 9 22:31:54 2023 +++ src/distrib/sets/lists/comp/mi Fri Jul 28 14:18:59 2023 @@ -1,4 +1,4 @@ -# $NetBSD: mi,v 1.2437 2023/07/10 02:31:54 christos Exp $ +# $NetBSD: mi,v 1.2438 2023/07/28 18:18:59 christos Exp $ # # Note: don't delete entries from here - mark them as "obsolete" instead. ./etc/mtree/set.comp comp-sys-root @@ -3328,6 +3328,7 @@ ./usr/include/sys/elfdefinitions.h comp-c-include ./usr/include/sys/endian.h comp-c-include ./usr/include/sys/envsys.h comp-c-include +./usr/include/sys/epoll.h comp-c-include ./usr/include/sys/errno.h comp-c-include ./usr/include/sys/evcnt.h comp-c-include ./usr/include/sys/event.h comp-c-include @@ -13211,6 +13212,16 @@ ./usr/share/man/html2/dup.html comp-c-htmlman html ./usr/share/man/html2/dup2.html comp-c-htmlman html ./usr/share/man/html2/dup3.html comp-c-htmlman html +./usr/share/man/html2/epoll.html comp-c-htmlman html +./usr/share/man/html2/epoll_create.html comp-c-htmlman html +./usr/share/man/html2/epoll_create1.html comp-c-htmlman html +./usr/share/man/html2/epoll_ctl.html comp-c-htmlman html +./usr/share/man/html2/epoll_data.html comp-c-htmlman html +./usr/share/man/html2/epoll_data_t.html comp-c-htmlman html +./usr/share/man/html2/epoll_event.html comp-c-htmlman html +./usr/share/man/html2/epoll_pwait.html comp-c-htmlman html +./usr/share/man/html2/epoll_pwait2.html comp-c-htmlman html +./usr/share/man/html2/epoll_wait.html comp-c-htmlman html ./usr/share/man/html2/errno.html comp-c-htmlman html ./usr/share/man/html2/eventfd.html comp-c-htmlman html ./usr/share/man/html2/eventfd_read.html comp-c-htmlman html @@ -21524,6 +21535,16 @@ ./usr/share/man/man2/dup.2 comp-c-man .man ./usr/share/man/man2/dup2.2 comp-c-man .man ./usr/share/man/man2/dup3.2 comp-c-man .man +./usr/share/man/man2/epoll.2 comp-c-man .man +./usr/share/man/man2/epoll_create.2 comp-c-man .man +./usr/share/man/man2/epoll_create1.2 comp-c-man .man +./usr/share/man/man2/epoll_ctl.2 comp-c-man .man +./usr/share/man/man2/epoll_data.2 comp-c-man .man +./usr/share/man/man2/epoll_data_t.2 comp-c-man .man +./usr/share/man/man2/epoll_event.2 comp-c-man .man +./usr/share/man/man2/epoll_pwait.2 comp-c-man .man +./usr/share/man/man2/epoll_pwait2.2 comp-c-man .man +./usr/share/man/man2/epoll_wait.2 comp-c-man .man ./usr/share/man/man2/errno.2 comp-c-man .man ./usr/share/man/man2/eventfd.2 comp-c-man .man ./usr/share/man/man2/eventfd_read.2 comp-c-man .man Index: src/distrib/sets/lists/debug/mi diff -u src/distrib/sets/lists/debug/mi:1.406 src/distrib/sets/lists/debug/mi:1.407 --- src/distrib/sets/lists/debug/mi:1.406 Wed Jul 5 18:42:46 2023 +++ src/distrib/sets/lists/debug/mi Fri Jul 28 14:18:59 2023 @@ -1,4 +1,4 @@ -# $NetBSD: mi,v 1.406 2023/07/05 22:42:46 riastradh Exp $ +# $NetBSD: mi,v 1.407 2023/07/28 18:18:59 christos Exp $ ./etc/mtree/set.debug comp-sys-root ./usr/lib comp-sys-usr compatdir ./usr/lib/i18n/libBIG5_g.a comp-c-debuglib debuglib,compatfile @@ -1790,6 +1790,7 @@ ./usr/libdata/debug/usr/tests/kernel/posix_spawn/t_fileactions.debug tests-obsolete obsolete,compattestfile ./usr/libdata/debug/usr/tests/kernel/posix_spawn/t_spawn.debug tests-obsolete obsolete,compattestfile ./usr/libdata/debug/usr/tests/kernel/posix_spawn/t_spawnattr.debug tests-obsolete obsolete,compattestfile +./usr/libdata/debug/usr/tests/kernel/t_epoll.debug tests-kernel-tests debug,atf ./usr/libdata/debug/usr/tests/kernel/t_extattrctl.debug tests-kernel-tests debug,atf,rump ./usr/libdata/debug/usr/tests/kernel/t_extent.debug tests-kernel-tests debug,atf,compattestfile ./usr/libdata/debug/usr/tests/kernel/t_fcntl.debug tests-kernel-tests debug,atf Index: src/distrib/sets/lists/tests/mi diff -u src/distrib/sets/lists/tests/mi:1.1277 src/distrib/sets/lists/tests/mi:1.1278 --- src/distrib/sets/lists/tests/mi:1.1277 Sat Jul 15 08:24:57 2023 +++ src/distrib/sets/lists/tests/mi Fri Jul 28 14:19:00 2023 @@ -1,4 +1,4 @@ -# $NetBSD: mi,v 1.1277 2023/07/15 12:24:57 rillig Exp $ +# $NetBSD: mi,v 1.1278 2023/07/28 18:19:00 christos Exp $ # # Note: don't delete entries from here - mark them as "obsolete" instead. # @@ -2296,6 +2296,7 @@ ./usr/tests/kernel/posix_spawn/t_fileactions tests-obsolete obsolete ./usr/tests/kernel/posix_spawn/t_spawn tests-obsolete obsolete ./usr/tests/kernel/posix_spawn/t_spawnattr tests-obsolete obsolete +./usr/tests/kernel/t_epoll tests-kernel-tests atf ./usr/tests/kernel/t_extattrctl tests-kernel-tests atf,rump ./usr/tests/kernel/t_extent tests-kernel-tests compattestfile,atf ./usr/tests/kernel/t_fcntl tests-kernel-tests atf Index: src/lib/libc/compat/sys/compat_kevent.c diff -u src/lib/libc/compat/sys/compat_kevent.c:1.2 src/lib/libc/compat/sys/compat_kevent.c:1.3 --- src/lib/libc/compat/sys/compat_kevent.c:1.2 Sat Jan 10 21:46:26 2009 +++ src/lib/libc/compat/sys/compat_kevent.c Fri Jul 28 14:19:00 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: compat_kevent.c,v 1.2 2009/01/11 02:46:26 christos Exp $ */ +/* $NetBSD: compat_kevent.c,v 1.3 2023/07/28 18:19:00 christos Exp $ */ /*- * Copyright (c) 2008 The NetBSD Foundation, Inc. @@ -37,7 +37,7 @@ */ #include <sys/cdefs.h> #if defined(LIBC_SCCS) && !defined(lint) -__RCSID("$NetBSD: compat_kevent.c,v 1.2 2009/01/11 02:46:26 christos Exp $"); +__RCSID("$NetBSD: compat_kevent.c,v 1.3 2023/07/28 18:19:00 christos Exp $"); #endif /* LIBC_SCCS and not lint */ #include "namespace.h" @@ -46,13 +46,16 @@ __RCSID("$NetBSD: compat_kevent.c,v 1.2 #include <compat/sys/time.h> #include <sys/event.h> #include <compat/sys/event.h> +#include <stdlib.h> __warn_references(kevent, "warning: reference to compatibility kevent(); include <sys/event.h> to generate correct reference") +__warn_references(kevent, + "warning: reference to compatibility __kevent50(); use kevent()") int -kevent(int kq, const struct kevent *changelist, size_t nchanges, - struct kevent *eventlist, size_t nevents, const struct timespec50 *ts50) +kevent(int kq, const struct kevent100 *changelist, size_t nchanges, + struct kevent100 *eventlist, size_t nevents, const struct timespec50 *ts50) { struct timespec ts, *tsp; @@ -60,5 +63,41 @@ kevent(int kq, const struct kevent *chan timespec50_to_timespec(ts50, tsp = &ts); else tsp = NULL; - return __kevent50(kq, changelist, nchanges, eventlist, nevents, tsp); + return __kevent50(kq, changelist, nchanges, eventlist, nevents, tsp); +} + +int +__kevent50(int kq, const struct kevent100 *changelist100, size_t nchanges, + struct kevent100 *eventlist100, size_t nevents, const struct timespec *tsp) +{ + int retval; + struct kevent *changelist; + struct kevent *eventlist; + + changelist = malloc(sizeof(*changelist) * nchanges); + if (changelist == NULL) { + return -1; + } + + eventlist = malloc(sizeof(*eventlist) * nevents); + if (eventlist == NULL) { + retval = -1; + goto leave0; + } + + for (size_t i = 0; i < nchanges; i++) + kevent100_to_kevent(changelist100 + i, changelist + i); + + retval = __kevent100(kq, changelist, nchanges, eventlist, nevents, tsp); + if (retval == -1) + goto leave1; + + for (int i = 0; i < retval; i++) + kevent_to_kevent100(eventlist + i, eventlist100 + i); + +leave1: + free(eventlist); +leave0: + free(changelist); + return retval; } Index: src/lib/libc/sys/Makefile.inc diff -u src/lib/libc/sys/Makefile.inc:1.251 src/lib/libc/sys/Makefile.inc:1.252 --- src/lib/libc/sys/Makefile.inc:1.251 Sun Jul 9 22:31:54 2023 +++ src/lib/libc/sys/Makefile.inc Fri Jul 28 14:19:00 2023 @@ -1,4 +1,4 @@ -# $NetBSD: Makefile.inc,v 1.251 2023/07/10 02:31:54 christos Exp $ +# $NetBSD: Makefile.inc,v 1.252 2023/07/28 18:19:00 christos Exp $ # @(#)Makefile.inc 8.3 (Berkeley) 10/24/94 # sys sources @@ -7,10 +7,10 @@ # other sources shared with the kernel, used in syscalls SRCS+= cpuset.c # glue to offer userland wrappers for some syscalls -SRCS+= accept4.c clock_getcpuclockid.c eventfd_read.c eventfd_write.c \ - posix_fadvise.c posix_madvise.c ppoll.c sched.c sigqueue.c \ - sigtimedwait.c sigwait.c sigwaitinfo.c statvfs.c swapon.c semctl.c \ - vadvise.c +SRCS+= accept4.c clock_getcpuclockid.c epoll.c eventfd_read.c \ + eventfd_write.c posix_fadvise.c posix_madvise.c ppoll.c sched.c \ + sigqueue.c sigtimedwait.c sigwait.c sigwaitinfo.c statvfs.c swapon.c \ + semctl.c vadvise.c .if ${RUMPRUN} != "yes" # modules with non-default implementations on at least one architecture: @@ -103,6 +103,7 @@ ASM=\ clock_getcpuclockid2.S \ __clock_getres50.S __clock_gettime50.S \ dup.S dup2.S dup3.S \ + epoll_create1.S epoll_ctl.S epoll_pwait2.S \ eventfd.S \ extattrctl.S \ extattr_delete_fd.S extattr_delete_file.S \ @@ -180,7 +181,7 @@ ASM_MD= _lwp_getprivate.S mremap.S WEAKASM= accept.S __aio_suspend50.S clock_nanosleep.S close.S connect.S \ execve.S \ fcntl.S fdatasync.S fsync.S \ - fsync_range.S __kevent50.S \ + fsync_range.S __kevent100.S \ kill.S mq_receive.S mq_send.S __mq_timedreceive50.S __mq_timedsend50.S \ msgrcv.S msgsnd.S __msync13.S __nanosleep50.S open.S openat.S \ paccept.S poll.S \ @@ -260,7 +261,7 @@ LintSysPseudoNoerr.c: ${LIBCDIR}/sys/mak MAN+= accept.2 access.2 acct.2 adjtime.2 bind.2 brk.2 chdir.2 \ chflags.2 chmod.2 chown.2 chroot.2 clock_getcpuclockid2.2 \ clock_settime.2 clone.2 close.2 \ - connect.2 dup.2 eventfd.2 execve.2 _exit.2 extattr_get_file.2 \ + connect.2 dup.2 epoll.2 eventfd.2 execve.2 _exit.2 extattr_get_file.2 \ fcntl.2 fdatasync.2 fdiscard.2 fhopen.2 \ flock.2 fork.2 fsync.2 getcontext.2 getdents.2 \ getfh.2 getvfsstat.2 getgid.2 getgroups.2 \ @@ -308,6 +309,15 @@ MLINKS+=chown.2 fchown.2 chown.2 lchown. MLINKS+=chroot.2 fchroot.2 MLINKS+=clock_settime.2 clock_gettime.2 MLINKS+=clock_settime.2 clock_getres.2 +MLINKS+=epoll.2 epoll_event.2 \ + epoll.2 epoll_data.2 \ + epoll.2 epoll_data_t.2 \ + epoll.2 epoll_create.2 \ + epoll.2 epoll_create1.2 \ + epoll.2 epoll_ctl.2 \ + epoll.2 epoll_wait.2 \ + epoll.2 epoll_pwait.2 \ + epoll.2 epoll_pwait2.2 MLINKS+=eventfd.2 eventfd_read.2 \ eventfd.2 eventfd_write.2 MLINKS+=extattr_get_file.2 extattr_set_file.2 \ Index: src/lib/libc/sys/kqueue.2 diff -u src/lib/libc/sys/kqueue.2:1.58 src/lib/libc/sys/kqueue.2:1.59 --- src/lib/libc/sys/kqueue.2:1.58 Sun Feb 13 11:51:56 2022 +++ src/lib/libc/sys/kqueue.2 Fri Jul 28 14:19:00 2023 @@ -1,4 +1,4 @@ -.\" $NetBSD: kqueue.2,v 1.58 2022/02/13 16:51:56 pgoyette Exp $ +.\" $NetBSD: kqueue.2,v 1.59 2023/07/28 18:19:00 christos Exp $ .\" .\" Copyright (c) 2000 Jonathan Lemon .\" All rights reserved. @@ -32,7 +32,7 @@ .\" .\" $FreeBSD: src/lib/libc/sys/kqueue.2,v 1.22 2001/06/27 19:55:57 dd Exp $ .\" -.Dd February 13, 2022 +.Dd June 19, 2023 .Dt KQUEUE 2 .Os .Sh NAME @@ -192,6 +192,7 @@ struct kevent { uint32_t fflags; /* filter flag value */ int64_t data; /* filter data value */ void *udata; /* opaque user data identifier */ + uint64_t ext[4]; /* extensions */ }; .Ed .Pp @@ -215,6 +216,20 @@ Filter-specific flags. Filter-specific data value. .It Fa udata Opaque user-defined value passed through the kernel unchanged. +.It Fa ext +Extended data passed to and from kernel. +The +.Fa ext[0] +and +.Fa ext[1] +members use is defined by the filter. +If the filter does not use them, the members are copied unchanged. +The +.Fa ext[2] +and +.Fa ext[3] +members are always passed through the kernel as-is, +making additional context available to application. .El .Pp The Index: src/lib/libpthread/pthread_cancelstub.c diff -u src/lib/libpthread/pthread_cancelstub.c:1.43 src/lib/libpthread/pthread_cancelstub.c:1.44 --- src/lib/libpthread/pthread_cancelstub.c:1.43 Tue Apr 19 16:32:17 2022 +++ src/lib/libpthread/pthread_cancelstub.c Fri Jul 28 14:19:00 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: pthread_cancelstub.c,v 1.43 2022/04/19 20:32:17 rillig Exp $ */ +/* $NetBSD: pthread_cancelstub.c,v 1.44 2023/07/28 18:19:00 christos Exp $ */ /*- * Copyright (c) 2002, 2007 The NetBSD Foundation, Inc. @@ -33,7 +33,7 @@ #undef _FORTIFY_SOURCE #include <sys/cdefs.h> -__RCSID("$NetBSD: pthread_cancelstub.c,v 1.43 2022/04/19 20:32:17 rillig Exp $"); +__RCSID("$NetBSD: pthread_cancelstub.c,v 1.44 2023/07/28 18:19:00 christos Exp $"); /* Need to use libc-private names for atomic operations. */ #include "../../common/lib/libc/atomic/atomic_op_namespace.h" @@ -101,7 +101,7 @@ int _sys_fcntl(int, int, ...); int _sys_fdatasync(int); int _sys_fsync(int); int _sys_fsync_range(int, int, off_t, off_t); -int _sys___kevent50(int, const struct kevent *, size_t, struct kevent *, +int _sys___kevent100(int, const struct kevent *, size_t, struct kevent *, size_t, const struct timespec *); int _sys_mq_send(mqd_t, const char *, size_t, unsigned); ssize_t _sys_mq_receive(mqd_t, char *, size_t, unsigned *); @@ -180,7 +180,7 @@ __aio_suspend50(const struct aiocb * con } int -__kevent50(int fd, const struct kevent *ev, size_t nev, struct kevent *rev, +__kevent100(int fd, const struct kevent *ev, size_t nev, struct kevent *rev, size_t nrev, const struct timespec *ts) { int retval; @@ -188,7 +188,7 @@ __kevent50(int fd, const struct kevent * self = pthread__self(); TESTCANCEL(self); - retval = _sys___kevent50(fd, ev, nev, rev, nrev, ts); + retval = _sys___kevent100(fd, ev, nev, rev, nrev, ts); TESTCANCEL(self); return retval; Index: src/lib/librumpclient/rumpclient.c diff -u src/lib/librumpclient/rumpclient.c:1.69 src/lib/librumpclient/rumpclient.c:1.70 --- src/lib/librumpclient/rumpclient.c:1.69 Thu Sep 16 18:19:10 2021 +++ src/lib/librumpclient/rumpclient.c Fri Jul 28 14:19:00 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: rumpclient.c,v 1.69 2021/09/16 22:19:10 andvar Exp $ */ +/* $NetBSD: rumpclient.c,v 1.70 2023/07/28 18:19:00 christos Exp $ */ /* * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved. @@ -50,7 +50,7 @@ #define USE_SIGNALFD #endif -__RCSID("$NetBSD: rumpclient.c,v 1.69 2021/09/16 22:19:10 andvar Exp $"); +__RCSID("$NetBSD: rumpclient.c,v 1.70 2023/07/28 18:19:00 christos Exp $"); #include <sys/param.h> #include <sys/mman.h> @@ -926,8 +926,10 @@ rumpclient_init(void) #ifdef __NetBSD__ #if !__NetBSD_Prereq__(5,99,7) FINDSYM(kevent) -#else +#elif !__NetBSD_Prereq__(10,99,4) FINDSYM2(kevent,_sys___kevent50) +#else + FINDSYM2(kevent,_sys___kevent100) #endif #else FINDSYM(kevent) Index: src/lib/librumphijack/hijack.c diff -u src/lib/librumphijack/hijack.c:1.136 src/lib/librumphijack/hijack.c:1.137 --- src/lib/librumphijack/hijack.c:1.136 Sat Apr 16 14:15:20 2022 +++ src/lib/librumphijack/hijack.c Fri Jul 28 14:19:00 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: hijack.c,v 1.136 2022/04/16 18:15:20 andvar Exp $ */ +/* $NetBSD: hijack.c,v 1.137 2023/07/28 18:19:00 christos Exp $ */ /*- * Copyright (c) 2011 Antti Kantee. All Rights Reserved. @@ -34,7 +34,7 @@ #include <rump/rumpuser_port.h> #if !defined(lint) -__RCSID("$NetBSD: hijack.c,v 1.136 2022/04/16 18:15:20 andvar Exp $"); +__RCSID("$NetBSD: hijack.c,v 1.137 2023/07/28 18:19:00 christos Exp $"); #endif #include <sys/param.h> @@ -190,7 +190,6 @@ enum dualcall { #define REALPSELECT pselect #define REALSELECT select #define REALPOLLTS pollts -#define REALKEVENT kevent #define REALSTAT __stat30 #define REALLSTAT __lstat30 #define REALFSTAT __fstat30 @@ -203,7 +202,6 @@ enum dualcall { #define REALPSELECT _sys___pselect50 #define REALSELECT _sys___select50 #define REALPOLLTS _sys___pollts50 -#define REALKEVENT _sys___kevent50 #define REALSTAT __stat50 #define REALLSTAT __lstat50 #define REALFSTAT __fstat50 @@ -214,6 +212,14 @@ enum dualcall { #define REALFHSTAT __fhstat50 #endif /* < 5.99.7 */ +#if !__NetBSD_Prereq__(5,99,7) +#define REALKEVENT kevent +#elif !__NetBSD_Prereq__(10,99,4) +#define REALKEVENT _sys___kevent50 +#else +#define REALKEVENT _sys___kevent100 +#endif + #define REALREAD _sys_read #define REALPREAD _sys_pread #define REALPWRITE _sys_pwrite Index: src/sys/compat/common/compat_100_mod.c diff -u src/sys/compat/common/compat_100_mod.c:1.1 src/sys/compat/common/compat_100_mod.c:1.2 --- src/sys/compat/common/compat_100_mod.c:1.1 Mon Dec 19 18:19:51 2022 +++ src/sys/compat/common/compat_100_mod.c Fri Jul 28 14:19:00 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: compat_100_mod.c,v 1.1 2022/12/19 23:19:51 pgoyette Exp $ */ +/* $NetBSD: compat_100_mod.c,v 1.2 2023/07/28 18:19:00 christos Exp $ */ /*- * Copyright (c) 2019 The NetBSD Foundation, Inc. @@ -38,7 +38,7 @@ #endif #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: compat_100_mod.c,v 1.1 2022/12/19 23:19:51 pgoyette Exp $"); +__KERNEL_RCSID(0, "$NetBSD: compat_100_mod.c,v 1.2 2023/07/28 18:19:00 christos Exp $"); #include <sys/systm.h> #include <sys/module.h> @@ -50,14 +50,14 @@ int compat_100_init(void) { - return 0; + return kern_event_100_init(); } int compat_100_fini(void) { - return 0; + return kern_event_100_fini(); } MODULE(MODULE_CLASS_EXEC, compat_100, NULL); Index: src/sys/compat/common/compat_mod.h diff -u src/sys/compat/common/compat_mod.h:1.7 src/sys/compat/common/compat_mod.h:1.8 --- src/sys/compat/common/compat_mod.h:1.7 Mon Dec 19 18:19:51 2022 +++ src/sys/compat/common/compat_mod.h Fri Jul 28 14:19:00 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: compat_mod.h,v 1.7 2022/12/19 23:19:51 pgoyette Exp $ */ +/* $NetBSD: compat_mod.h,v 1.8 2023/07/28 18:19:00 christos Exp $ */ /*- * Copyright (c) 2013, 2019 The NetBSD Foundation, Inc. @@ -35,6 +35,8 @@ #ifdef COMPAT_100 int compat_100_init(void); int compat_100_fini(void); +int kern_event_100_init(void); +int kern_event_100_fini(void); #endif #ifdef COMPAT_90 Index: src/sys/compat/common/files.common diff -u src/sys/compat/common/files.common:1.8 src/sys/compat/common/files.common:1.9 --- src/sys/compat/common/files.common:1.8 Mon Dec 19 18:19:51 2022 +++ src/sys/compat/common/files.common Fri Jul 28 14:19:00 2023 @@ -1,4 +1,4 @@ -# $NetBSD: files.common,v 1.8 2022/12/19 23:19:51 pgoyette Exp $ +# $NetBSD: files.common,v 1.9 2023/07/28 18:19:00 christos Exp $ # # Generic utility files, used by various compat options. @@ -112,6 +112,7 @@ file compat/common/vfs_syscalls_90.c co # Compatibility code for NetBSD 10.0 file compat/common/compat_100_mod.c compat_100 +file compat/common/kern_event_100.c compat_100 # # Sources for sysv ipc compatibility across the versions. Index: src/sys/compat/common/kern_select_50.c diff -u src/sys/compat/common/kern_select_50.c:1.3 src/sys/compat/common/kern_select_50.c:1.4 --- src/sys/compat/common/kern_select_50.c:1.3 Fri Sep 20 11:05:22 2019 +++ src/sys/compat/common/kern_select_50.c Fri Jul 28 14:19:00 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: kern_select_50.c,v 1.3 2019/09/20 15:05:22 kamil Exp $ */ +/* $NetBSD: kern_select_50.c,v 1.4 2023/07/28 18:19:00 christos Exp $ */ /*- * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. @@ -29,7 +29,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_select_50.c,v 1.3 2019/09/20 15:05:22 kamil Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_select_50.c,v 1.4 2023/07/28 18:19:00 christos Exp $"); #if defined(_KERNEL_OPT) #include "opt_compat_netbsd.h" @@ -44,6 +44,7 @@ __KERNEL_RCSID(0, "$NetBSD: kern_select_ #include <sys/syscallvar.h> #include <sys/syscallargs.h> +#include <compat/sys/event.h> #include <compat/sys/time.h> #include <compat/common/compat_mod.h> @@ -76,21 +77,22 @@ compat_50_sys_kevent(struct lwp *l, cons { /* { syscallarg(int) fd; - syscallarg(keventp_t) changelist; + syscallarg(struct kevent100 *) changelist; syscallarg(size_t) nchanges; - syscallarg(keventp_t) eventlist; + syscallarg(struct kevent100 *) eventlist; syscallarg(size_t) nevents; syscallarg(struct timespec50) timeout; } */ static const struct kevent_ops compat_50_kevent_ops = { .keo_private = NULL, .keo_fetch_timeout = compat_50_kevent_fetch_timeout, - .keo_fetch_changes = kevent_fetch_changes, - .keo_put_events = kevent_put_events, + .keo_fetch_changes = compat_100___kevent50_fetch_changes, + .keo_put_events = compat_100___kevent50_put_events, }; - return kevent1(retval, SCARG(uap, fd), SCARG(uap, changelist), - SCARG(uap, nchanges), SCARG(uap, eventlist), SCARG(uap, nevents), + return kevent1(retval, SCARG(uap, fd), + (const struct kevent *)(const void *)SCARG(uap, changelist), SCARG(uap, nchanges), + (struct kevent *)(void *)SCARG(uap, eventlist), SCARG(uap, nevents), (const struct timespec *)(const void *)SCARG(uap, timeout), &compat_50_kevent_ops); } Index: src/sys/compat/linux/arch/amd64/syscalls.master diff -u src/sys/compat/linux/arch/amd64/syscalls.master:1.68 src/sys/compat/linux/arch/amd64/syscalls.master:1.69 --- src/sys/compat/linux/arch/amd64/syscalls.master:1.68 Sun Jul 9 22:31:55 2023 +++ src/sys/compat/linux/arch/amd64/syscalls.master Fri Jul 28 14:19:00 2023 @@ -1,4 +1,4 @@ - $NetBSD: syscalls.master,v 1.68 2023/07/10 02:31:55 christos Exp $ + $NetBSD: syscalls.master,v 1.69 2023/07/28 18:19:00 christos Exp $ ; @(#)syscalls.master 8.1 (Berkeley) 7/19/93 @@ -51,6 +51,7 @@ #include <compat/sys/time.h> #include <compat/linux/common/linux_types.h> +#include <compat/linux/common/linux_misc.h> #include <compat/linux/common/linux_mmap.h> #include <compat/linux/common/linux_ipc.h> #include <compat/linux/common/linux_msg.h> @@ -412,7 +413,7 @@ 210 UNIMPL io_cancel 211 UNIMPL get_thread_area 212 UNIMPL lookup_dcookie -213 UNIMPL epoll_create +213 STD { int|linux_sys||epoll_create(int size); } 214 UNIMPL epoll_ctl_old 215 UNIMPL epoll_wait_old 216 UNIMPL remap_file_pages @@ -442,8 +443,11 @@ int flags, struct linux_timespec *rqtp, \ struct linux_timespec *rmtp); } 231 STD { int|linux_sys||exit_group(int error_code); } -232 UNIMPL epoll_wait -233 UNIMPL epoll_ctl +232 STD { int|linux_sys||epoll_wait(int epfd, \ + struct linux_epoll_event *events, int maxevents, \ + int timeout); } +233 STD { int|linux_sys||epoll_ctl(int epfd, int op, int fd, \ + struct linux_epoll_event *event); } 234 STD { int|linux_sys||tgkill(int tgid, int tid, int sig); } 235 NOARGS { int|compat_50_sys||utimes(const char *path, \ const struct timeval50 *tptr); } @@ -517,7 +521,9 @@ 279 UNIMPL move_pages 280 STD { int|linux_sys||utimensat(int fd, const char *path, \ struct linux_timespec *times, int flag); } -281 UNIMPL epoll_pwait +281 STD { int|linux_sys||epoll_pwait(int epfd, \ + struct linux_epoll_event *events, int maxevents, \ + int timeout, const linux_sigset_t *sigmask); } 282 UNIMPL signalfd 283 STD { int|linux_sys||timerfd_create(clockid_t clock_id, \ int flags); } @@ -535,7 +541,7 @@ 289 UNIMPL signalfd4 290 STD { int|linux_sys||eventfd2(unsigned int initval, \ int flags); } -291 UNIMPL epoll_create1 +291 STD { int|linux_sys||epoll_create1(int flags); } 292 STD { int|linux_sys||dup3(int from, int to, int flags); } 293 STD { int|linux_sys||pipe2(int *pfds, int flags); } 294 UNIMPL inotify_init1 @@ -696,7 +702,10 @@ 438 UNIMPL pidfd_getfd 439 UNIMPL faccessat2 440 UNIMPL process_madvise -441 UNIMPL epoll_pwait2 +441 STD { int|linux_sys||epoll_pwait2(int epfd, \ + struct linux_epoll_event *events, int maxevents, \ + const struct linux_timespec *timeout, \ + const linux_sigset_t *sigmask); } 442 UNIMPL mount_setattr 443 UNIMPL quotactl_fd 444 UNIMPL landlock_create_ruleset Index: src/sys/compat/linux/common/linux_misc.c diff -u src/sys/compat/linux/common/linux_misc.c:1.257 src/sys/compat/linux/common/linux_misc.c:1.258 --- src/sys/compat/linux/common/linux_misc.c:1.257 Sun Jul 9 22:31:55 2023 +++ src/sys/compat/linux/common/linux_misc.c Fri Jul 28 14:19:01 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: linux_misc.c,v 1.257 2023/07/10 02:31:55 christos Exp $ */ +/* $NetBSD: linux_misc.c,v 1.258 2023/07/28 18:19:01 christos Exp $ */ /*- * Copyright (c) 1995, 1998, 1999, 2008 The NetBSD Foundation, Inc. @@ -57,13 +57,14 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: linux_misc.c,v 1.257 2023/07/10 02:31:55 christos Exp $"); +__KERNEL_RCSID(0, "$NetBSD: linux_misc.c,v 1.258 2023/07/28 18:19:01 christos Exp $"); #include <sys/param.h> #include <sys/systm.h> #include <sys/namei.h> #include <sys/proc.h> #include <sys/dirent.h> +#include <sys/epoll.h> #include <sys/eventfd.h> #include <sys/file.h> #include <sys/stat.h> @@ -1682,6 +1683,234 @@ linux_sys_eventfd2(struct lwp *l, const retval); } +/* + * epoll_create(2). Check size and call sys_epoll_create1. + */ +int +linux_sys_epoll_create(struct lwp *l, + const struct linux_sys_epoll_create_args *uap, register_t *retval) +{ + /* { + syscallarg(int) size; + } */ + struct sys_epoll_create1_args ca; + + /* + * SCARG(uap, size) is unused. Linux just tests it and then + * forgets it as well. + */ + if (SCARG(uap, size) <= 0) + return EINVAL; + + SCARG(&ca, flags) = 0; + return sys_epoll_create1(l, &ca, retval); +} + +/* + * epoll_create1(2). Translate the flags and call sys_epoll_create1. + */ +int +linux_sys_epoll_create1(struct lwp *l, + const struct linux_sys_epoll_create1_args *uap, register_t *retval) +{ + /* { + syscallarg(int) flags; + } */ + struct sys_epoll_create1_args ca; + + if ((SCARG(uap, flags) & ~(LINUX_O_CLOEXEC)) != 0) + return EINVAL; + + SCARG(&ca, flags) = 0; + if ((SCARG(uap, flags) & LINUX_O_CLOEXEC) != 0) + SCARG(&ca, flags) |= O_CLOEXEC; + + return sys_epoll_create1(l, &ca, retval); +} + +/* + * epoll_ctl(2). Copyin event and translate it if necessary and then + * call epoll_ctl_common(). + */ +int +linux_sys_epoll_ctl(struct lwp *l, const struct linux_sys_epoll_ctl_args *uap, + register_t *retval) +{ + /* { + syscallarg(int) epfd; + syscallarg(int) op; + syscallarg(int) fd; + syscallarg(struct linux_epoll_event *) event; + } */ + struct linux_epoll_event lee; + struct epoll_event ee; + struct epoll_event *eep; + int error; + + if (SCARG(uap, op) != EPOLL_CTL_DEL) { + error = copyin(SCARG(uap, event), &lee, sizeof(lee)); + if (error != 0) + return error; + + /* + * On some architectures, struct linux_epoll_event and + * struct epoll_event are packed differently... but otherwise + * the contents are the same. + */ + ee.events = lee.events; + ee.data = lee.data; + + eep = ⅇ + } else + eep = NULL; + + return epoll_ctl_common(l, retval, SCARG(uap, epfd), SCARG(uap, op), + SCARG(uap, fd), eep); +} + +/* + * epoll_wait(2). Call sys_epoll_pwait(). + */ +int +linux_sys_epoll_wait(struct lwp *l, + const struct linux_sys_epoll_wait_args *uap, register_t *retval) +{ + /* { + syscallarg(int) epfd; + syscallarg(struct linux_epoll_event *) events; + syscallarg(int) maxevents; + syscallarg(int) timeout; + } */ + struct linux_sys_epoll_pwait_args ea; + + SCARG(&ea, epfd) = SCARG(uap, epfd); + SCARG(&ea, events) = SCARG(uap, events); + SCARG(&ea, maxevents) = SCARG(uap, maxevents); + SCARG(&ea, timeout) = SCARG(uap, timeout); + SCARG(&ea, sigmask) = NULL; + + return linux_sys_epoll_pwait(l, &ea, retval); +} + +/* + * Main body of epoll_pwait2(2). Translate timeout and sigmask and + * call epoll_wait_common. + */ +static int +linux_epoll_pwait2_common(struct lwp *l, register_t *retval, int epfd, + struct linux_epoll_event *events, int maxevents, + struct linux_timespec *timeout, const linux_sigset_t *sigmask) +{ + struct timespec ts, *tsp; + linux_sigset_t lss; + sigset_t ss, *ssp; + struct epoll_event *eep; + struct linux_epoll_event *leep; + int i, error; + + if (maxevents <= 0 || maxevents > EPOLL_MAX_EVENTS) + return EINVAL; + + if (timeout != NULL) { + linux_to_native_timespec(&ts, timeout); + tsp = &ts; + } else + tsp = NULL; + + if (sigmask != NULL) { + error = copyin(sigmask, &lss, sizeof(lss)); + if (error != 0) + return error; + + linux_to_native_sigset(&ss, &lss); + ssp = &ss; + } else + ssp = NULL; + + eep = kmem_alloc(maxevents * sizeof(*eep), KM_SLEEP); + + error = epoll_wait_common(l, retval, epfd, eep, maxevents, tsp, + ssp); + if (error == 0 && *retval > 0) { + leep = kmem_alloc((*retval) * sizeof(*leep), KM_SLEEP); + + /* Translate the events (because of packing). */ + for (i = 0; i < *retval; i++) { + leep[i].events = eep[i].events; + leep[i].data = eep[i].data; + } + + error = copyout(leep, events, (*retval) * sizeof(*leep)); + kmem_free(leep, (*retval) * sizeof(*leep)); + } + + kmem_free(eep, maxevents * sizeof(*eep)); + return error; +} + +/* + * epoll_pwait(2). Translate timeout and call sys_epoll_pwait2. + */ +int +linux_sys_epoll_pwait(struct lwp *l, + const struct linux_sys_epoll_pwait_args *uap, register_t *retval) +{ + /* { + syscallarg(int) epfd; + syscallarg(struct linux_epoll_event *) events; + syscallarg(int) maxevents; + syscallarg(int) timeout; + syscallarg(linux_sigset_t *) sigmask; + } */ + struct linux_timespec lts, *ltsp; + const int timeout = SCARG(uap, timeout); + + if (timeout >= 0) { + /* Convert from milliseconds to timespec. */ + lts.tv_sec = timeout / 1000; + lts.tv_nsec = (timeout % 1000) * 1000000; + + ltsp = <s; + } else + ltsp = NULL; + + return linux_epoll_pwait2_common(l, retval, SCARG(uap, epfd), + SCARG(uap, events), SCARG(uap, maxevents), ltsp, + SCARG(uap, sigmask)); +} + + +/* + * epoll_pwait2(2). Copyin timeout and call linux_epoll_pwait2_common(). + */ +int +linux_sys_epoll_pwait2(struct lwp *l, + const struct linux_sys_epoll_pwait2_args *uap, register_t *retval) +{ + /* { + syscallarg(int) epfd; + syscallarg(struct linux_epoll_event *) events; + syscallarg(int) maxevents; + syscallarg(struct linux_timespec *) timeout; + syscallarg(linux_sigset_t *) sigmask; + } */ + struct linux_timespec lts, *ltsp; + int error; + + if (SCARG(uap, timeout) != NULL) { + error = copyin(SCARG(uap, timeout), <s, sizeof(lts)); + if (error != 0) + return error; + + ltsp = <s; + } else + ltsp = NULL; + + return linux_epoll_pwait2_common(l, retval, SCARG(uap, epfd), + SCARG(uap, events), SCARG(uap, maxevents), ltsp, + SCARG(uap, sigmask)); +} + #define LINUX_MFD_CLOEXEC 0x0001U #define LINUX_MFD_ALLOW_SEALING 0x0002U #define LINUX_MFD_HUGETLB 0x0004U Index: src/sys/compat/linux/common/linux_misc.h diff -u src/sys/compat/linux/common/linux_misc.h:1.26 src/sys/compat/linux/common/linux_misc.h:1.27 --- src/sys/compat/linux/common/linux_misc.h:1.26 Sat May 2 21:06:56 2020 +++ src/sys/compat/linux/common/linux_misc.h Fri Jul 28 14:19:01 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: linux_misc.h,v 1.26 2020/05/03 01:06:56 thorpej Exp $ */ +/* $NetBSD: linux_misc.h,v 1.27 2023/07/28 18:19:01 christos Exp $ */ /*- * Copyright (c) 1998 The NetBSD Foundation, Inc. @@ -141,6 +141,15 @@ extern const int linux_fstypes_cnt; */ #define linux_to_bsd_posix_fadv(advice) (advice) +struct linux_epoll_event { + uint32_t events; + uint64_t data; +} +#if defined(__amd64__) +__packed +#endif +; + #ifdef _KERNEL __BEGIN_DECLS int bsd_to_linux_wstat(int); Index: src/sys/compat/sys/event.h diff -u src/sys/compat/sys/event.h:1.2 src/sys/compat/sys/event.h:1.3 --- src/sys/compat/sys/event.h:1.2 Sat Jan 10 21:45:50 2009 +++ src/sys/compat/sys/event.h Fri Jul 28 14:19:01 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: event.h,v 1.2 2009/01/11 02:45:50 christos Exp $ */ +/* $NetBSD: event.h,v 1.3 2023/07/28 18:19:01 christos Exp $ */ /*- * Copyright (c) 1999,2000,2001 Jonathan Lemon <jle...@freebsd.org> @@ -34,11 +34,90 @@ #include <sys/cdefs.h> struct timespec; +#ifdef _KERNEL +#include <lib/libkern/libkern.h> +#else +#include <string.h> +#endif + +struct kevent100 { + uintptr_t ident; /* identifier for this event */ + uint32_t filter; /* filter for event */ + uint32_t flags; /* action flags for kqueue */ + uint32_t fflags; /* filter flag value */ + int64_t data; /* filter data value */ + void *udata; /* opaque user data identifier */ +}; + +static __inline void +kevent100_to_kevent(const struct kevent100 *kev100, struct kevent *kev) +{ + memset(kev, 0, sizeof(*kev)); + memcpy(kev, kev100, sizeof(*kev100)); +} + +static __inline void +kevent_to_kevent100(const struct kevent *kev, struct kevent100 *kev100) +{ + memcpy(kev100, kev, sizeof(*kev100)); +} + +#ifdef _KERNEL +static int +compat_100___kevent50_fetch_changes(void *ctx, const struct kevent *changelist, + struct kevent *changes, size_t index, int n) +{ + int error, i; + struct kevent100 *buf; + const size_t buf_size = sizeof(*buf) * n; + const struct kevent100 *changelist100 = (const struct kevent100 *)changelist; + + KASSERT(n >= 0); + + buf = kmem_alloc(buf_size, KM_SLEEP); + + error = copyin(changelist100 + index, buf, buf_size); + if (error != 0) + goto leave; + + for (i = 0; i < n; i++) + kevent100_to_kevent(buf + i, changes + i); + +leave: + kmem_free(buf, buf_size); + return error; +} + +static int +compat_100___kevent50_put_events(void *ctx, struct kevent *events, + struct kevent *eventlist, size_t index, int n) +{ + int error, i; + struct kevent100 *buf; + const size_t buf_size = sizeof(*buf) * n; + struct kevent100 *eventlist100 = (struct kevent100 *)eventlist; + + KASSERT(n >= 0); + + buf = kmem_alloc(buf_size, KM_SLEEP); + + for (i = 0; i < n; i++) + kevent_to_kevent100(events + i, buf + i); + + error = copyout(buf, eventlist100 + index, buf_size); + + kmem_free(buf, buf_size); + return error; +} +#endif /* _KERNEL */ + __BEGIN_DECLS -int kevent(int, const struct kevent *, size_t, struct kevent *, size_t, - const struct timespec50 *); -int __kevent50(int, const struct kevent *, size_t, struct kevent *, size_t, - const struct timespec *); +int kevent(int, const struct kevent100 *, size_t, struct kevent100 *, + size_t, const struct timespec50 *); +int __kevent50(int, const struct kevent100 *, size_t, struct kevent100 *, + size_t, const struct timespec *); +int __kevent100(int, const struct kevent *, size_t, struct kevent *, + size_t, const struct timespec *); __END_DECLS #endif /* !_COMPAT_SYS_EVENT_H_ */ Index: src/sys/kern/files.kern diff -u src/sys/kern/files.kern:1.59 src/sys/kern/files.kern:1.60 --- src/sys/kern/files.kern:1.59 Mon Jul 10 02:42:33 2023 +++ src/sys/kern/files.kern Fri Jul 28 14:19:01 2023 @@ -1,4 +1,4 @@ -# $NetBSD: files.kern,v 1.59 2023/07/10 06:42:33 mrg Exp $ +# $NetBSD: files.kern,v 1.60 2023/07/28 18:19:01 christos Exp $ # # kernel sources @@ -161,6 +161,7 @@ file kern/subr_workqueue.c kern file kern/subr_xcall.c kern file kern/sys_aio.c aio file kern/sys_descrip.c kern +file kern/sys_epoll.c kern file kern/sys_eventfd.c kern file kern/sys_futex.c kern file kern/sys_generic.c kern Index: src/sys/kern/kern_event.c diff -u src/sys/kern/kern_event.c:1.148 src/sys/kern/kern_event.c:1.149 --- src/sys/kern/kern_event.c:1.148 Sat Apr 22 09:52:54 2023 +++ src/sys/kern/kern_event.c Fri Jul 28 14:19:01 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: kern_event.c,v 1.148 2023/04/22 13:52:54 riastradh Exp $ */ +/* $NetBSD: kern_event.c,v 1.149 2023/07/28 18:19:01 christos Exp $ */ /*- * Copyright (c) 2008, 2009, 2021 The NetBSD Foundation, Inc. @@ -63,7 +63,7 @@ #endif /* _KERNEL_OPT */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_event.c,v 1.148 2023/04/22 13:52:54 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_event.c,v 1.149 2023/07/28 18:19:01 christos Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -1785,7 +1785,7 @@ static const struct kevent_ops kevent_na }; int -sys___kevent50(struct lwp *l, const struct sys___kevent50_args *uap, +sys___kevent100(struct lwp *l, const struct sys___kevent100_args *uap, register_t *retval) { /* { Index: src/sys/kern/makesyscalls.sh diff -u src/sys/kern/makesyscalls.sh:1.186 src/sys/kern/makesyscalls.sh:1.187 --- src/sys/kern/makesyscalls.sh:1.186 Thu Oct 21 07:01:03 2021 +++ src/sys/kern/makesyscalls.sh Fri Jul 28 14:19:01 2023 @@ -1,4 +1,4 @@ -# $NetBSD: makesyscalls.sh,v 1.186 2021/10/21 11:01:03 andvar Exp $ +# $NetBSD: makesyscalls.sh,v 1.187 2023/07/28 18:19:01 christos Exp $ # # Copyright (c) 1994, 1996, 2000 Christopher G. Demetriou # All rights reserved. @@ -419,6 +419,7 @@ NR == 1 { uncompattypes["struct timeval50"] = "struct timeval"; uncompattypes["struct timespec50"] = "struct timespec"; uncompattypes["struct stat30"] = "struct stat"; + uncompattypes["struct kevent100"] = "struct kevent"; next } Index: src/sys/kern/syscalls.conf diff -u src/sys/kern/syscalls.conf:1.31 src/sys/kern/syscalls.conf:1.32 --- src/sys/kern/syscalls.conf:1.31 Sat May 16 14:31:50 2020 +++ src/sys/kern/syscalls.conf Fri Jul 28 14:19:01 2023 @@ -1,4 +1,4 @@ -# $NetBSD: syscalls.conf,v 1.31 2020/05/16 18:31:50 christos Exp $ +# $NetBSD: syscalls.conf,v 1.32 2023/07/28 18:19:01 christos Exp $ sysnames="syscalls.c" sysnumhdr="../sys/syscall.h" @@ -11,7 +11,7 @@ sysalign=1 rumpcalls="../rump/librump/rumpkern/rump_syscalls.c" rumpcallshdr="../rump/include/rump/rump_syscalls.h" rumpsysmap="../rump/rump.sysmap" -compatopts="compat_43 compat_09 compat_10 compat_11 compat_12 compat_13 compat_14 compat_15 compat_16 compat_20 compat_30 compat_40 compat_50 compat_60 compat_70 compat_80 compat_90" +compatopts="compat_43 compat_09 compat_10 compat_11 compat_12 compat_13 compat_14 compat_15 compat_16 compat_20 compat_30 compat_40 compat_50 compat_60 compat_70 compat_80 compat_90 compat_100" libcompatopts="" switchname="sysent" Index: src/sys/kern/syscalls.master diff -u src/sys/kern/syscalls.master:1.310 src/sys/kern/syscalls.master:1.311 --- src/sys/kern/syscalls.master:1.310 Sun Jul 9 22:33:04 2023 +++ src/sys/kern/syscalls.master Fri Jul 28 14:19:01 2023 @@ -1,4 +1,4 @@ - $NetBSD: syscalls.master,v 1.310 2023/07/10 02:33:04 christos Exp $ + $NetBSD: syscalls.master,v 1.311 2023/07/28 18:19:01 christos Exp $ ; @(#)syscalls.master 8.2 (Berkeley) 1/13/94 @@ -705,8 +705,8 @@ 343 STD { int|sys||rasctl(void *addr, size_t len, int op); } 344 STD RUMP { int|sys||kqueue(void); } 345 COMPAT_50 MODULAR compat_50 RUMP { int|sys||kevent(int fd, \ - const struct kevent *changelist, size_t nchanges, \ - struct kevent *eventlist, size_t nevents, \ + const struct kevent100 *changelist, size_t nchanges, \ + struct kevent100 *eventlist, size_t nevents, \ const struct timespec50 *timeout); } ; Scheduling system calls. @@ -912,9 +912,10 @@ { int|sys||_lwp_park(const struct timespec *ts, \ lwpid_t unpark, const void *hint, \ const void *unparkhint); } -435 STD RUMP { int|sys|50|kevent(int fd, \ - const struct kevent *changelist, size_t nchanges, \ - struct kevent *eventlist, size_t nevents, \ +435 COMPAT_100 MODULAR compat_100 RUMP \ + { int|sys|50|kevent(int fd, \ + const struct kevent100 *changelist, size_t nchanges, \ + struct kevent100 *eventlist, size_t nevents, \ const struct timespec *timeout); } 436 STD RUMP { int|sys|50|pselect(int nd, fd_set *in, fd_set *ou, \ fd_set *ex, const struct timespec *ts, \ @@ -1051,3 +1052,14 @@ 499 STD RUMP { long|sys||lpathconf(const char *path, int name); } 500 STD { int|sys||memfd_create(const char *name, \ unsigned int flags); } +501 STD RUMP { int|sys|100|kevent(int fd, \ + const struct kevent *changelist, size_t nchanges, \ + struct kevent *eventlist, size_t nevents, \ + const struct timespec *timeout); } +502 STD { int|sys||epoll_create1(int flags); } +503 STD { int|sys||epoll_ctl(int epfd, int op, int fd, \ + struct epoll_event *event); } +504 STD { int|sys||epoll_pwait2(int epfd, \ + struct epoll_event *events, int maxevents, \ + const struct timespec *timeout, \ + const sigset_t *sigmask); } Index: src/sys/rump/Makefile.rump diff -u src/sys/rump/Makefile.rump:1.134 src/sys/rump/Makefile.rump:1.135 --- src/sys/rump/Makefile.rump:1.134 Tue May 3 04:34:00 2022 +++ src/sys/rump/Makefile.rump Fri Jul 28 14:19:01 2023 @@ -1,4 +1,4 @@ -# $NetBSD: Makefile.rump,v 1.134 2022/05/03 08:34:00 hannken Exp $ +# $NetBSD: Makefile.rump,v 1.135 2023/07/28 18:19:01 christos Exp $ # .if !defined(_RUMP_MK) @@ -48,7 +48,7 @@ CPPFLAGS+= -DMIPS1=1 # which NetBSD compat to build RUMP_NBCOMPAT?=default .if ${RUMP_NBCOMPAT} == "all" || ${RUMP_NBCOMPAT} == "default" -RUMP_NBCOMPAT= 50 60 70 80 90 +RUMP_NBCOMPAT= 50 60 70 80 90 100 .endif .if ${RUMP_NBCOMPAT} == "none" RUMP_NBCOMPAT= Index: src/sys/rump/rump.sysmap diff -u src/sys/rump/rump.sysmap:1.9 src/sys/rump/rump.sysmap:1.10 --- src/sys/rump/rump.sysmap:1.9 Mon Nov 2 13:56:16 2020 +++ src/sys/rump/rump.sysmap Fri Jul 28 14:19:01 2023 @@ -178,7 +178,7 @@ 428 sys___clock_settime50 __clock_settime50 rump___sysimpl_clock_settime50 429 sys___clock_getres50 __clock_getres50 rump___sysimpl_clock_getres50 430 sys___nanosleep50 __nanosleep50 rump___sysimpl_nanosleep50 -435 sys___kevent50 __kevent50 rump___sysimpl_kevent50 +435 sys_nomodule __kevent50 rump___sysimpl_kevent50 436 sys___pselect50 __pselect50 rump___sysimpl_pselect50 437 sys___pollts50 __pollts50 rump___sysimpl_pollts50 438 sys_nomodule __aio_suspend50 rump___sysimpl_aio_suspend50 @@ -220,3 +220,4 @@ 485 sys___fstatvfs190 __fstatvfs190 rump___sysimpl_fstatvfs190 486 sys___fhstatvfs190 __fhstatvfs190 rump___sysimpl_fhstatvfs190 499 sys_lpathconf lpathconf rump___sysimpl_lpathconf +501 sys___kevent100 __kevent100 rump___sysimpl_kevent100 Index: src/sys/sys/Makefile diff -u src/sys/sys/Makefile:1.180 src/sys/sys/Makefile:1.181 --- src/sys/sys/Makefile:1.180 Sun Oct 10 09:03:10 2021 +++ src/sys/sys/Makefile Fri Jul 28 14:19:01 2023 @@ -1,4 +1,4 @@ -# $NetBSD: Makefile,v 1.180 2021/10/10 13:03:10 jmcneill Exp $ +# $NetBSD: Makefile,v 1.181 2023/07/28 18:19:01 christos Exp $ .include <bsd.own.mk> @@ -18,7 +18,7 @@ INCS= acct.h acl.h agpio.h aio.h ansi.h dir.h dirent.h \ disk.h disklabel.h disklabel_acorn.h disklabel_gpt.h disklabel_rdb.h \ dkbad.h dkio.h dkstat.h domain.h drvctlio.h dvdio.h \ - efiio.h endian.h envsys.h errno.h evcnt.h event.h eventfd.h exec.h \ + efiio.h endian.h envsys.h epoll.h errno.h evcnt.h event.h eventfd.h exec.h \ exec_aout.h exec_coff.h exec_ecoff.h exec_elf.h exec_script.h \ extattr.h extent.h \ fault.h \ Index: src/sys/sys/event.h diff -u src/sys/sys/event.h:1.54 src/sys/sys/event.h:1.55 --- src/sys/sys/event.h:1.54 Mon Jul 18 20:46:00 2022 +++ src/sys/sys/event.h Fri Jul 28 14:19:01 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: event.h,v 1.54 2022/07/19 00:46:00 thorpej Exp $ */ +/* $NetBSD: event.h,v 1.55 2023/07/28 18:19:01 christos Exp $ */ /*- * Copyright (c) 1999,2000,2001 Jonathan Lemon <jle...@freebsd.org> @@ -70,6 +70,7 @@ struct kevent { uint32_t fflags; /* filter flag value */ int64_t data; /* filter data value */ void *udata; /* opaque user data identifier */ + uint64_t ext[4]; /* extensions */ }; static __inline void @@ -349,7 +350,7 @@ int kqueue(void); int kqueue1(int); #ifndef __LIBC12_SOURCE__ int kevent(int, const struct kevent *, size_t, struct kevent *, size_t, - const struct timespec *) __RENAME(__kevent50); + const struct timespec *) __RENAME(__kevent100); #endif #endif /* !_POSIX_C_SOURCE */ __END_DECLS Index: src/sys/sys/syscall.h diff -u src/sys/sys/syscall.h:1.322 src/sys/sys/syscall.h:1.323 --- src/sys/sys/syscall.h:1.322 Sun Jul 9 22:37:05 2023 +++ src/sys/sys/syscall.h Fri Jul 28 14:19:01 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: syscall.h,v 1.322 2023/07/10 02:37:05 christos Exp $ */ +/* $NetBSD: syscall.h,v 1.323 2023/07/28 18:19:01 christos Exp $ */ /* * System call numbers. @@ -954,7 +954,7 @@ /* syscall: "kqueue" ret: "int" args: */ #define SYS_kqueue 344 -/* syscall: "compat_50_kevent" ret: "int" args: "int" "const struct kevent *" "size_t" "struct kevent *" "size_t" "const struct timespec50 *" */ +/* syscall: "compat_50_kevent" ret: "int" args: "int" "const struct kevent100 *" "size_t" "struct kevent100 *" "size_t" "const struct timespec50 *" */ #define SYS_compat_50_kevent 345 /* syscall: "_sched_setparam" ret: "int" args: "pid_t" "lwpid_t" "int" "const struct sched_param *" */ @@ -1207,8 +1207,8 @@ /* syscall: "compat_60__lwp_park" ret: "int" args: "const struct timespec *" "lwpid_t" "const void *" "const void *" */ #define SYS_compat_60__lwp_park 434 -/* syscall: "__kevent50" ret: "int" args: "int" "const struct kevent *" "size_t" "struct kevent *" "size_t" "const struct timespec *" */ -#define SYS___kevent50 435 +/* syscall: "compat_100___kevent50" ret: "int" args: "int" "const struct kevent100 *" "size_t" "struct kevent100 *" "size_t" "const struct timespec *" */ +#define SYS_compat_100___kevent50 435 /* syscall: "__pselect50" ret: "int" args: "int" "fd_set *" "fd_set *" "fd_set *" "const struct timespec *" "const sigset_t *" */ #define SYS___pselect50 436 @@ -1407,6 +1407,18 @@ /* syscall: "memfd_create" ret: "int" args: "const char *" "unsigned int" */ #define SYS_memfd_create 500 -#define SYS_MAXSYSCALL 501 +/* syscall: "__kevent100" ret: "int" args: "int" "const struct kevent *" "size_t" "struct kevent *" "size_t" "const struct timespec *" */ +#define SYS___kevent100 501 + +/* syscall: "epoll_create1" ret: "int" args: "int" */ +#define SYS_epoll_create1 502 + +/* syscall: "epoll_ctl" ret: "int" args: "int" "int" "int" "struct epoll_event *" */ +#define SYS_epoll_ctl 503 + +/* syscall: "epoll_pwait2" ret: "int" args: "int" "struct epoll_event *" "int" "const struct timespec *" "const sigset_t *" */ +#define SYS_epoll_pwait2 504 + +#define SYS_MAXSYSCALL 505 #define SYS_NSYSENT 512 #endif /* _SYS_SYSCALL_H_ */ Index: src/tests/kernel/Makefile diff -u src/tests/kernel/Makefile:1.72 src/tests/kernel/Makefile:1.73 --- src/tests/kernel/Makefile:1.72 Sat Jun 3 17:28:52 2023 +++ src/tests/kernel/Makefile Fri Jul 28 14:19:01 2023 @@ -1,4 +1,4 @@ -# $NetBSD: Makefile,v 1.72 2023/06/03 21:28:52 lukem Exp $ +# $NetBSD: Makefile,v 1.73 2023/07/28 18:19:01 christos Exp $ NOMAN= # defined @@ -7,7 +7,8 @@ NOMAN= # defined TESTSDIR= ${TESTSBASE}/kernel TESTS_SUBDIRS+= kqueue -TESTS_C= t_fcntl +TESTS_C= t_epoll +TESTS_C+= t_fcntl TESTS_C+= t_lock TESTS_C+= t_lockf TESTS_C+= t_pty Added files: Index: src/lib/libc/sys/epoll.2 diff -u /dev/null src/lib/libc/sys/epoll.2:1.1 --- /dev/null Fri Jul 28 14:19:02 2023 +++ src/lib/libc/sys/epoll.2 Fri Jul 28 14:19:00 2023 @@ -0,0 +1,388 @@ +.\" $NetBSD: epoll.2,v 1.1 2023/07/28 18:19:00 christos Exp $ +.\" +.\" Copyright (c) 2023 The NetBSD Foundation, Inc. +.\" All rights reserved. +.\" +.\" This code is derived from software contributed to The NetBSD Foundation +.\" by Theodore Preduta. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd July 19, 2023 +.Dt EPOLL 2 +.Os +.Sh NAME +.Nm epoll , +.Nm epoll_event , +.Nm epoll_data , +.Nm epoll_data_t , +.Nm epoll_create , +.Nm epoll_create1 , +.Nm epoll_ctl , +.Nm epoll_wait , +.Nm epoll_pwait , +.Nm epoll_pwait2 +.Nd event notification mechanism +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In sys/epoll.h +.Bd -literal +union epoll_data { + void *ptr; + int fd; + uint32_t u32; + uint64_t u64; +}; + +typedef union epoll_data epoll_data_t; + +struct epoll_event { + uint32_t events; + epoll_data_t data; +}; +.Ed +.Pp +.Ft int +.Fn epoll_create "int size" +.Ft int +.Fn epoll_create1 "int flags" +.Ft int +.Fn epoll_ctl "int epfd" "int op" "int fd" "struct epoll_event *event" +.Ft int +.Fn epoll_wait "int epfd" "struct epoll_event *events" "int maxevents" "int timeout" +.Ft int +.Fn epoll_pwait "int epfd" "struct epoll_event *events" "int maxevents" "int timeout" "const sigset_t *sigmask" +.Ft int +.Fn epoll_pwait2 "int epfd" "struct epoll_event *events" "int maxevents" "const struct timespec *timeout" "const sigset_t *sigmask" +.Sh DESCRIPTION +.Nm +provides a similar facility to both +.Xr select 2 +and +.Xr kqueue 2 : +it allows for the examination of file descriptors to see if they are available +for reading/writing. +.Pp +The +.Va epoll_event +structure consists of two fields, +.Va events +and +.Va data . +The +.Va data +field is passed through the kernel and is intended to be used to identify the +event. +When used with +.Fa epoll_ctl , +the +.Va events +field consists of a mask of the events that the +.Nm +instance should watch for, and when being used with +.Fa epoll_wait , +.Fa epoll_pwait , +and +.Fa epoll_pwait2 +consists of a mask of the events that occurred. +The following are possible values for +.Va events : +.Bl -tag -width EPOLLONESHOT +.It Dv EPOLLIN +Watch for +.Xr read 2 +operations. +.It Dv EPOLLOUT +Watch for +.Xr write 2 +operations. +.It Dv EPOLLRDHUP +Watch for a peer closed connection. +.It Dv EPOLLERR +Watch for error conditions. +.It Dv EPOLLET +This option modifies the other set bits of +.Va events . +When set, the events described by other bits in +.Va events +are only triggered when the state change. +Otherwise the events are considered triggered whenever the condition is true. +.It Dv EPOLLONESHOT +Remove this event once it is retrieved once. +.El +.Pp +.Fn epoll_create +and +.Fn epoll_create1 +both create an +.Nm +instance. +The +.Fa size +argument specified for +.Fn epoll_create +exists so that the +.Nx +function has the same signature as the Linux system call of the same name. +.Fa size +must be positive, but is otherwise unused. +Additionally, optionally, +.Dv EPOLL_CLOEXEC +may be specified in the +.Fa flags +of +.Fn epoll_create1 +to set the +.Xr close 2 +on +.Xr exec 2 +flag. +.Pp +.Fn epoll_ctl +is used to make changes to the given +.Nm +instance based on the provided +.Fa op . +Possible values for +.Fa op +are: +.Bl -tag -width EPOLL_CTL_ADD +.It Dv EPOLL_CTL_ADD +Register interest of +.Fa fd +on the +.Fa epfd +for the events specified in +.Fa event . +.It Dv EPOLL_CTL_MOD +Modify the events registered for +.Fa fd +to those specified in +.Fa event . +.It Dv EPOLL_CTL_DEL +Deregister +.Fa fd +from the +.Nm +instance specified in +.Fa epfd . +Note that +.Fa event +is completely ignored in this case. +.El +.Pp +.Fn epoll_wait , +.Fn epoll_pwait , +and +.Fn epoll_pwait2 +provide the ability to wait for up to +.Fa maxevents +which are stored in the buffer pointed to by +.Fa events . +For +.Fn epoll_wait +and +.Fn epoll_pwait , +a timeout may be specified in +.Fa timeout +in milliseconds. +If no timeout is desired, -1 should be specified in +.Fa timeout . +For +.Fn epoll_pwait2 +if no timeout is desired +.Fa timeout +should be specified as +.Dv NULL . +Additionally, +a sigmask may be specified to +.Fa epoll_pwait +and +.Fa epoll_pwait2 +in +.Fa sigmask +to set the sigmask while +.Nm +waits for events. +.Pp +Note that +.Nm +is not intended to be used by native +.Nx +applications. +Instead it is only intended to used as a means to help port software originally +written for Linux to +.Nx . +.Sh RETURN VALUES +.Fn epoll_create +and +.Fn epoll_create1 +both return a file descriptor when successful. +.Pp +.Fn epoll_ctl +returns zero when successful. +.Pp +.Fn epoll_wait , +.Fn epoll_pwait , +and +.Fn epoll_pwait2 +return the number of events written to +.Fa events +when successful. +Note that zero is written to when +.Fa timeout +expires and no events were available. +.Pp +When any of the above fail, -1 is returned and +.Fa errno +is set. +.Sh ERRORS +The +.Fn epoll_create +and +.Fn epoll_create1 +functions fail if: +.Bl -tag -width Er +.It Bq Er EINVAL +.Fa size +is not positive. +.Pp +Bits other than +.Dv EPOLL_CLOEXEC +are provided in +.Fa flags . +.It Bq Er EMFILE +The per-process descriptor table is full. +.It Bq Er ENFILE +The system file table is full. +.It Bq Er ENOMEM +The kernel failed to allocate enough memory for a +.Nm +instance. +.El +.Pp +The +.Fn epoll_ctl +function fails if: +.Bl -tag -width Er +.It Bq Er EBADF +.Fa epfd +or +.Fa fd +is not a valid file descriptor. +.It Bq Er EEXIST +.Fa op +is +.Dv EPOLL_CTL_ADD +and +.Fa fd +was already previously added via +.Dv EPOLL_CTL_ADD . +.It Bq Er EINVAL +.Fa epfd +is not a file descriptor for an +.Nm +instance. +.Pp +.Fa epfd +and +.Fa fd +represent the same +.Nm +instance. +.It Bq Er ELOOP +.Fa op +is +.Dv EPOLL_CTL_ADD +and adding +.Fa fd +would result in a circular loop of +.Nm +instances. +.It Bq Er ENOENT +.Fa op +is +.Dv EPOLL_CTL_MOD +or +.Dv EPOLL_CTL_DEL +and +.Fa fd +was not previously added with +.Dv EPOLL_CTL_ADD . +.It Bq Er ENOMEM +The kernel failed to allocate enough memory for +.Fa op . +.It Bq Er EPERM +.Fa fd +does not support +.Nm epoll . +.El +.Pp +The +.Fn epoll_wait , +.Fn epoll_pwait , +and +.Fn epoll_pwait2 +functions fail if: +.Bl -tag -width Er +.It Bq Er EBADF +.Fa epfd +is not a valid file descriptor. +.It Bq Er EFAULT +The area provided in +.Fa events +failed to be written to. +.It Bq Er EINTR +A signal was delivered before any events became available and +.Fa timeout +expired. +.It Bq Er EINVAL +.Fa epfd +is not a valid +.Nm +file descriptor. +.Pp +.Fa maxevents +is less than or equal to zero. +.El +.Sh SEE ALSO +.Xr kqueue 2 , +.Xr poll 2 , +.Xr select 2 +.Sh HISTORY +The +.Nm +functions and types are designed to be compatible with the Linux system calls of +the same name. +.Sh CAVEATS +The +.Nm +facility is not intended to be used in conjunction with +.Xr kqueue 2 . +.Pp +Unlike Linux's +.Nm , +the +.Nx +version does not survive a +.Xr fork 2 . Index: src/lib/libc/sys/epoll.c diff -u /dev/null src/lib/libc/sys/epoll.c:1.1 --- /dev/null Fri Jul 28 14:19:02 2023 +++ src/lib/libc/sys/epoll.c Fri Jul 28 14:19:00 2023 @@ -0,0 +1,69 @@ +/* $NetBSD: epoll.c,v 1.1 2023/07/28 18:19:00 christos Exp $ */ + +/*- + * Copyright (c) 2023 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#include <sys/cdefs.h> +__RCSID("$NetBSD: epoll.c,v 1.1 2023/07/28 18:19:00 christos Exp $"); + +#include <sys/epoll.h> +#include <sys/sigtypes.h> +#include <sys/time.h> + +#include <errno.h> +#include <stddef.h> + +int +epoll_create(int size) +{ + if (size <= 0) { + errno = EINVAL; + return -1; + } + + return epoll_create1(0); +} + +int +epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout) +{ + return epoll_pwait(epfd, events, maxevents, timeout, NULL); +} + +int +epoll_pwait(int epfd, struct epoll_event *events, int maxevents, int timeout, + const sigset_t *sigmask) +{ + struct timespec ts, *tsp; + + if (timeout >= 0) { + ts.tv_sec = timeout / 1000; + ts.tv_nsec = (timeout % 1000) * 1000000; + tsp = &ts; + } else + tsp = NULL; + + return epoll_pwait2(epfd, events, maxevents, tsp, sigmask); +} Index: src/sys/compat/common/kern_event_100.c diff -u /dev/null src/sys/compat/common/kern_event_100.c:1.1 --- /dev/null Fri Jul 28 14:19:02 2023 +++ src/sys/compat/common/kern_event_100.c Fri Jul 28 14:19:00 2023 @@ -0,0 +1,88 @@ +/* $NetBSD: kern_event_100.c,v 1.1 2023/07/28 18:19:00 christos Exp $ */ + +/*- + * Copyright (c) 2023 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#include <sys/cdefs.h> +__KERNEL_RCSID(0, "$NetBSD: kern_event_100.c,v 1.1 2023/07/28 18:19:00 christos Exp $"); + +#if defined(_KERNEL_OPT) +#include "opt_compat_netbsd.h" +#endif + +#include <sys/param.h> +#include <sys/event.h> +#include <sys/syscall.h> +#include <sys/syscallvar.h> +#include <sys/syscallargs.h> + +#include <compat/common/compat_mod.h> +#include <compat/sys/event.h> + +static const struct syscall_package kern_event_100_syscalls[] = { + { SYS_compat_100___kevent50, 0, + (sy_call_t *)compat_100_sys___kevent50 }, + { 0, 0, NULL }, +}; + +int +compat_100_sys___kevent50(struct lwp *l, + const struct compat_100_sys___kevent50_args *uap, + register_t *retval) +{ + /* { + syscallarg(int) fd; + syscallarg(const struct kevent100 *) changelist; + syscallarg(size_t) nchanges; + syscallarg(struct kevent100 *) eventlist; + syscallarg(size_t) nevents; + syscallarg(const struct timespec *) timeout; + } */ + static const struct kevent_ops compat_100_kevent_ops = { + .keo_private = NULL, + .keo_fetch_timeout = copyin, + .keo_fetch_changes = compat_100___kevent50_fetch_changes, + .keo_put_events = compat_100___kevent50_put_events, + }; + + return kevent1(retval, SCARG(uap, fd), + (const struct kevent *)SCARG(uap, changelist), SCARG(uap, nchanges), + (struct kevent *)SCARG(uap, eventlist), SCARG(uap, nevents), + SCARG(uap, timeout), &compat_100_kevent_ops); +} + +int +kern_event_100_init(void) +{ + + return syscall_establish(NULL, kern_event_100_syscalls); +} + +int +kern_event_100_fini(void) +{ + + return syscall_disestablish(NULL, kern_event_100_syscalls); +} Index: src/sys/kern/sys_epoll.c diff -u /dev/null src/sys/kern/sys_epoll.c:1.1 --- /dev/null Fri Jul 28 14:19:02 2023 +++ src/sys/kern/sys_epoll.c Fri Jul 28 14:19:01 2023 @@ -0,0 +1,680 @@ +/* $NetBSD: sys_epoll.c,v 1.1 2023/07/28 18:19:01 christos Exp $ */ + +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2007 Roman Divacky + * Copyright (c) 2014 Dmitry Chagin <dcha...@freebsd.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include <sys/cdefs.h> +__KERNEL_RCSID(0, "$NetBSD: sys_epoll.c,v 1.1 2023/07/28 18:19:01 christos Exp $"); + + +#include <sys/param.h> +#include <sys/types.h> +#include <sys/bitops.h> +#include <sys/epoll.h> +#include <sys/event.h> +#include <sys/eventvar.h> +#include <sys/errno.h> +#include <sys/file.h> +#include <sys/filedesc.h> +#include <sys/fcntl.h> +#include <sys/proc.h> +#include <sys/signal.h> +#include <sys/vnode.h> + +#include <sys/syscallargs.h> + +#define EPOLL_MAX_DEPTH 5 + +#define EPOLL_EVRD (EPOLLIN|EPOLLRDNORM) +#define EPOLL_EVWR (EPOLLOUT|EPOLLWRNORM) +#define EPOLL_EVSUP (EPOLLET|EPOLLONESHOT|EPOLLHUP|EPOLLERR|EPOLLPRI \ + |EPOLL_EVRD|EPOLL_EVWR|EPOLLRDHUP) + +#define kext_data ext[0] +#define kext_epfd ext[1] +#define kext_fd ext[2] + +#if DEBUG +#define DPRINTF(x) uprintf x +#else +#define DPRINTF(x) __nothing +#endif + +struct epoll_edge { + int epfd; + int fd; +}; + +__BITMAP_TYPE(epoll_seen, char, 1); + +static int epoll_to_kevent(int, int, struct epoll_event *, struct kevent *, + int *); +static void kevent_to_epoll(struct kevent *, struct epoll_event *); +static int epoll_kev_put_events(void *, struct kevent *, struct kevent *, + size_t, int); +static int epoll_kev_fetch_changes(void *, const struct kevent *, + struct kevent *, size_t, int); +static int epoll_kev_fetch_timeout(const void *, void *, size_t); +static int epoll_register_kevent(register_t *, int, int, int, + unsigned int); +static int epoll_fd_registered(register_t *, int, int); +static int epoll_delete_all_events(register_t *, int, int); +static int epoll_recover_watch_tree(struct epoll_edge *, size_t, size_t); +static int epoll_dfs(struct epoll_edge *, size_t, struct epoll_seen *, + size_t, int, int); +static int epoll_check_loop_and_depth(struct lwp *, int, int); + +/* + * epoll_create1(2). Parse the flags and then create a kqueue instance. + */ +int +sys_epoll_create1(struct lwp *l, const struct sys_epoll_create1_args *uap, + register_t *retval) +{ + /* { + syscallarg(int) flags; + } */ + struct sys_kqueue1_args kqa; + + if ((SCARG(uap, flags) & ~(O_CLOEXEC)) != 0) + return EINVAL; + + SCARG(&kqa, flags) = SCARG(uap, flags); + + return sys_kqueue1(l, &kqa, retval); +} + +/* + * Structure converting function from epoll to kevent. + */ +static int +epoll_to_kevent(int epfd, int fd, struct epoll_event *l_event, + struct kevent *kevent, int *nkevents) +{ + uint32_t levents = l_event->events; + uint32_t kev_flags = EV_ADD | EV_ENABLE; + + /* flags related to how event is registered */ + if ((levents & EPOLLONESHOT) != 0) + kev_flags |= EV_DISPATCH; + if ((levents & EPOLLET) != 0) + kev_flags |= EV_CLEAR; + if ((levents & EPOLLERR) != 0) + kev_flags |= EV_ERROR; + if ((levents & EPOLLRDHUP) != 0) + kev_flags |= EV_EOF; + + /* flags related to what event is registered */ + if ((levents & EPOLL_EVRD) != 0) { + EV_SET(kevent, fd, EVFILT_READ, kev_flags, 0, 0, 0); + kevent->kext_data = l_event->data; + kevent->kext_epfd = epfd; + kevent->kext_fd = fd; + ++kevent; + ++(*nkevents); + } + if ((levents & EPOLL_EVWR) != 0) { + EV_SET(kevent, fd, EVFILT_WRITE, kev_flags, 0, 0, 0); + kevent->kext_data = l_event->data; + kevent->kext_epfd = epfd; + kevent->kext_fd = fd; + ++kevent; + ++(*nkevents); + } + /* zero event mask is legal */ + if ((levents & (EPOLL_EVRD | EPOLL_EVWR)) == 0) { + EV_SET(kevent++, fd, EVFILT_READ, EV_ADD|EV_DISABLE, 0, 0, 0); + ++(*nkevents); + } + + if ((levents & ~(EPOLL_EVSUP)) != 0) { + return EINVAL; + } + + return 0; +} + +/* + * Structure converting function from kevent to epoll. In a case + * this is called on error in registration we store the error in + * event->data and pick it up later in sys_epoll_ctl(). + */ +static void +kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event) +{ + + l_event->data = kevent->kext_data; + + if ((kevent->flags & EV_ERROR) != 0) { + l_event->events = EPOLLERR; + return; + } + + /* XXX EPOLLPRI, EPOLLHUP */ + switch (kevent->filter) { + case EVFILT_READ: + l_event->events = EPOLLIN; + if ((kevent->flags & EV_EOF) != 0) + l_event->events |= EPOLLRDHUP; + break; + case EVFILT_WRITE: + l_event->events = EPOLLOUT; + break; + default: + DPRINTF(("%s: unhandled kevent filter %d\n", __func__, + kevent->filter)); + break; + } +} + +/* + * Copyout callback used by kevent. This converts kevent events to + * epoll events that are located in args->eventlist. + */ +static int +epoll_kev_put_events(void *ctx, struct kevent *events, + struct kevent *eventlist, size_t index, int n) +{ + int i; + struct epoll_event *eep = (struct epoll_event *)eventlist; + + KASSERT(n >= 0 && n < EPOLL_MAX_EVENTS); + + for (i = 0; i < n; i++) + kevent_to_epoll(events + i, eep + index + i); + + return 0; +} + +/* + * Copyin callback used by kevent. This copies already + * converted filters from kernel memory to the kevent + * internal kernel memory. Hence the memcpy instead of + * copyin. + */ +static int +epoll_kev_fetch_changes(void *ctx, const struct kevent *changelist, + struct kevent *changes, size_t index, int n) +{ + KASSERT(n >= 0 && n < EPOLL_MAX_EVENTS); + + memcpy(changes, changelist + index, n * sizeof(*changes)); + + return 0; +} + +/* + * Timer copy callback used by kevent. Copies a converted timeout + * from kernel memory to kevent memory. Hence the memcpy instead of + * just using copyin. + */ +static int +epoll_kev_fetch_timeout(const void *src, void *dest, size_t size) +{ + memcpy(dest, src, size); + + return 0; +} + +/* + * Load epoll filter, convert it to kevent filter and load it into + * kevent subsystem. + * + * event must point to kernel memory or be NULL. + */ +int +epoll_ctl_common(struct lwp *l, register_t *retval, int epfd, int op, int fd, + struct epoll_event *event) +{ + struct kevent kev[2]; + struct kevent_ops k_ops = { + .keo_private = NULL, + .keo_fetch_timeout = NULL, + .keo_fetch_changes = epoll_kev_fetch_changes, + .keo_put_events = NULL, + }; + file_t *epfp, *fp; + int error = 0; + int nchanges = 0; + + /* + * Need to validate epfd and fd separately from kevent1 to match + * Linux's errno behaviour. + */ + epfp = fd_getfile(epfd); + if (epfp == NULL) + return EBADF; + if (epfp->f_type != DTYPE_KQUEUE) + error = EINVAL; + fd_putfile(epfd); + if (error != 0) + return error; + + fp = fd_getfile(fd); + if (fp == NULL) + return EBADF; + if (fp->f_type == DTYPE_VNODE) { + switch (fp->f_vnode->v_type) { + case VREG: + case VDIR: + case VBLK: + case VLNK: + error = EPERM; + break; + + default: + break; + } + } + fd_putfile(fd); + if (error != 0) + return error; + + /* Linux disallows spying on himself */ + if (epfd == fd) { + return EINVAL; + } + + if (op != EPOLL_CTL_DEL) { + error = epoll_to_kevent(epfd, fd, event, kev, &nchanges); + if (error != 0) + return error; + } + + switch (op) { + case EPOLL_CTL_MOD: + error = epoll_delete_all_events(retval, epfd, fd); + if (error != 0) + return error; + break; + + case EPOLL_CTL_ADD: + if (epoll_fd_registered(retval, epfd, fd)) + return EEXIST; + error = epoll_check_loop_and_depth(l, epfd, fd); + if (error != 0) + return error; + break; + + case EPOLL_CTL_DEL: + /* CTL_DEL means unregister this fd with this epoll */ + return epoll_delete_all_events(retval, epfd, fd); + + default: + DPRINTF(("%s: invalid op %d\n", ___func__, op)); + return EINVAL; + } + + error = kevent1(retval, epfd, kev, nchanges, NULL, 0, NULL, &k_ops); + + if (error == EOPNOTSUPP) { + error = EPERM; + } + + return error; +} + +/* + * epoll_ctl(2). Copyin event if necessary and then call + * epoll_ctl_common(). + */ +int +sys_epoll_ctl(struct lwp *l, const struct sys_epoll_ctl_args *uap, + register_t *retval) +{ + /* { + syscallarg(int) epfd; + syscallarg(int) op; + syscallarg(int) fd; + syscallarg(struct epoll_event *) event; + } */ + struct epoll_event ee; + struct epoll_event *eep; + int error; + + if (SCARG(uap, op) != EPOLL_CTL_DEL) { + error = copyin(SCARG(uap, event), &ee, sizeof(ee)); + if (error != 0) + return error; + + eep = ⅇ + } else + eep = NULL; + + return epoll_ctl_common(l, retval, SCARG(uap, epfd), SCARG(uap, op), + SCARG(uap, fd), eep); +} + +/* + * Wait for a filter to be triggered on the epoll file descriptor. + * All of the epoll_*wait* syscalls eventually end up here. + * + * events, nss, and ssp must point to kernel memory (or be NULL). + */ +int +epoll_wait_common(struct lwp *l, register_t *retval, int epfd, + struct epoll_event *events, int maxevents, struct timespec *tsp, + const sigset_t *nssp) +{ + struct kevent_ops k_ops = { + .keo_private = NULL, + .keo_fetch_timeout = epoll_kev_fetch_timeout, + .keo_fetch_changes = NULL, + .keo_put_events = epoll_kev_put_events, + }; + struct proc *p = l->l_proc; + file_t *epfp; + sigset_t oss; + int error = 0; + + if (maxevents <= 0 || maxevents > EPOLL_MAX_EVENTS) + return EINVAL; + + /* + * Need to validate epfd separately from kevent1 to match + * Linux's errno behaviour. + */ + epfp = fd_getfile(epfd); + if (epfp == NULL) + return EBADF; + if (epfp->f_type != DTYPE_KQUEUE) + error = EINVAL; + fd_putfile(epfd); + if (error != 0) + return error; + + if (nssp != NULL) { + mutex_enter(p->p_lock); + error = sigprocmask1(l, SIG_SETMASK, nssp, &oss); + mutex_exit(p->p_lock); + if (error != 0) + return error; + } + + error = kevent1(retval, epfd, NULL, 0, (struct kevent *)events, + maxevents, tsp, &k_ops); + /* + * Since we're not registering nay events, ENOMEM should not + * be possible for this specific kevent1 call. + */ + KASSERT(error != ENOMEM); + + if (nssp != NULL) { + mutex_enter(p->p_lock); + error = sigprocmask1(l, SIG_SETMASK, &oss, NULL); + mutex_exit(p->p_lock); + } + + return error; +} + +/* + * epoll_pwait2(2). + */ +int +sys_epoll_pwait2(struct lwp *l, const struct sys_epoll_pwait2_args *uap, + register_t *retval) +{ + /* { + syscallarg(int) epfd; + syscallarg(struct epoll_event *) events; + syscallarg(int) maxevents; + syscallarg(struct timespec *) timeout; + syscallarg(sigset_t *) sigmask; + } */ + struct epoll_event *events; + struct timespec ts, *tsp; + sigset_t ss, *ssp; + int error; + const int maxevents = SCARG(uap, maxevents); + + if (maxevents <= 0 || maxevents >= EPOLL_MAX_EVENTS) + return EINVAL; + + if (SCARG(uap, timeout) != NULL) { + error = copyin(SCARG(uap, timeout), &ts, sizeof(ts)); + if (error != 0) + return error; + + tsp = &ts; + } else + tsp = NULL; + + if (SCARG(uap, sigmask) != NULL) { + error = copyin(SCARG(uap, sigmask), &ss, sizeof(ss)); + if (error != 0) + return error; + + ssp = &ss; + } else + ssp = NULL; + + events = kmem_alloc(maxevents * sizeof(*events), KM_SLEEP); + + error = epoll_wait_common(l, retval, SCARG(uap, epfd), events, + maxevents, tsp, ssp); + if (error == 0) + error = copyout(events, SCARG(uap, events), + *retval * sizeof(*events)); + + kmem_free(events, maxevents * sizeof(*events)); + return error; +} + +/* + * Helper that registers a single kevent. + */ +static int +epoll_register_kevent(register_t *retval, int epfd, int fd, int filter, + unsigned int flags) +{ + struct kevent kev; + struct kevent_ops k_ops = { + .keo_private = NULL, + .keo_fetch_timeout = NULL, + .keo_fetch_changes = epoll_kev_fetch_changes, + .keo_put_events = NULL, + }; + + EV_SET(&kev, fd, filter, flags, 0, 0, 0); + + return kevent1(retval, epfd, &kev, 1, NULL, 0, NULL, &k_ops); +} + +/* + * Check if an fd is already registered in the kqueue referenced by epfd. + */ +static int +epoll_fd_registered(register_t *retval, int epfd, int fd) +{ + /* + * Set empty filter flags to avoid accidental modification of already + * registered events. In the case of event re-registration: + * 1. If event does not exists kevent() does nothing and returns ENOENT + * 2. If event does exists, it's enabled/disabled state is preserved + * but fflags, data and udata fields are overwritten. So we can not + * set socket lowats and store user's context pointer in udata. + */ + if (epoll_register_kevent(retval, epfd, fd, EVFILT_READ, 0) != ENOENT || + epoll_register_kevent(retval, epfd, fd, EVFILT_WRITE, 0) != ENOENT) + return 1; + + return 0; +} + +/* + * Remove all events in the kqueue referenced by epfd that depend on + * fd. + */ +static int +epoll_delete_all_events(register_t *retval, int epfd, int fd) +{ + int error1, error2; + + error1 = epoll_register_kevent(retval, epfd, fd, EVFILT_READ, + EV_DELETE); + error2 = epoll_register_kevent(retval, epfd, fd, EVFILT_WRITE, + EV_DELETE); + + /* return 0 if at least one result positive */ + return error1 == 0 ? 0 : error2; +} + +/* + * Interate through all the knotes and recover a directed graph on + * which kqueues are watching each other. + * + * If edges is NULL, the number of edges is still counted but no graph + * is assembled. + */ +static int +epoll_recover_watch_tree(struct epoll_edge *edges, size_t nedges, size_t nfds) { + file_t *currfp, *targetfp; + struct knote *kn, *tmpkn; + size_t i, nedges_so_far = 0; + + for (i = 0; i < nfds && (edges == NULL || nedges_so_far < nedges); i++) + { + currfp = fd_getfile(i); + if (currfp == NULL) + continue; + if (currfp->f_type != DTYPE_KQUEUE) + goto continue_count_outer; + + SLIST_FOREACH_SAFE(kn, &currfp->f_kqueue->kq_sel.sel_klist, + kn_selnext, tmpkn) { + targetfp = fd_getfile(kn->kn_kevent.kext_epfd); + if (targetfp == NULL) + continue; + if (targetfp->f_type == DTYPE_KQUEUE) { + if (edges != NULL) { + edges[nedges_so_far].epfd = + kn->kn_kevent.kext_epfd; + edges[nedges_so_far].fd = + kn->kn_kevent.kext_fd; + } + nedges_so_far++; + } + + fd_putfile(kn->kn_kevent.kext_epfd); + } + +continue_count_outer: + fd_putfile(i); + } + + return nedges_so_far; +} + +/* + * Run dfs on the graph described by edges, checking for loops and a + * depth greater than EPOLL_MAX_DEPTH. + */ +static int +epoll_dfs(struct epoll_edge *edges, size_t nedges, struct epoll_seen *seen, + size_t nseen, int currfd, int depth) +{ + int error; + size_t i; + + KASSERT(edges != NULL); + KASSERT(seen != NULL); + KASSERT(nedges > 0); + KASSERT(currfd < nseen); + KASSERT(0 <= depth && depth <= EPOLL_MAX_DEPTH + 1); + + if (__BITMAP_ISSET(currfd, seen)) + return ELOOP; + + __BITMAP_SET(currfd, seen); + + depth++; + if (depth > EPOLL_MAX_DEPTH) + return EINVAL; + + for (i = 0; i < nedges; i++) { + if (edges[i].epfd != currfd) + continue; + + error = epoll_dfs(edges, nedges, seen, nseen, + edges[i].fd, depth); + if (error != 0) + return error; + } + + return 0; +} + +/* + * Check if adding fd to epfd would violate the maximum depth or + * create a loop. + */ +static int +epoll_check_loop_and_depth(struct lwp *l, int epfd, int fd) +{ + int error; + file_t *fp; + struct epoll_edge *edges; + struct epoll_seen *seen; + size_t nedges, nfds, seen_size; + bool fdirrelevant; + + /* If the target isn't another kqueue, we can skip this check */ + fp = fd_getfile(fd); + if (fp == NULL) + return 0; + fdirrelevant = fp->f_type != DTYPE_KQUEUE; + fd_putfile(fd); + if (fdirrelevant) + return 0; + + nfds = l->l_proc->p_fd->fd_lastfile + 1; + + /* + * We call epoll_recover_watch_tree twice, once to find the + * number of edges, and once to actually fill them in. We add one + * because we want to include the edge epfd->fd. + */ + nedges = 1 + epoll_recover_watch_tree(NULL, 0, nfds); + + edges = kmem_zalloc(nedges * sizeof(*edges), KM_SLEEP); + + epoll_recover_watch_tree(edges + 1, nedges - 1, nfds); + + edges[0].epfd = epfd; + edges[0].fd = fd; + + seen_size = __BITMAP_SIZE(char, nfds); + seen = kmem_zalloc(seen_size, KM_SLEEP); + + error = epoll_dfs(edges, nedges, seen, nfds, epfd, 0); + + kmem_free(seen, seen_size); + kmem_free(edges, nedges * sizeof(*edges)); + + return error; +} Index: src/sys/sys/epoll.h diff -u /dev/null src/sys/sys/epoll.h:1.1 --- /dev/null Fri Jul 28 14:19:02 2023 +++ src/sys/sys/epoll.h Fri Jul 28 14:19:01 2023 @@ -0,0 +1,98 @@ +/* $NetBSD: epoll.h,v 1.1 2023/07/28 18:19:01 christos Exp $ */ + +/*- + * Copyright (c) 2007 Roman Divacky + * Copyright (c) 2014 Dmitry Chagin <dcha...@freebsd.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _SYS_EPOLL_H_ +#define _SYS_EPOLL_H_ + +#include <sys/types.h> /* for uint32_t, uint64_t */ +#include <sys/sigtypes.h> /* for sigset_t */ +struct timespec; + +#define EPOLLIN 0x00000001 +#define EPOLLPRI 0x00000002 +#define EPOLLOUT 0x00000004 +#define EPOLLERR 0x00000008 +#define EPOLLHUP 0x00000010 +#define EPOLLRDNORM 0x00000040 +#define EPOLLRDBAND 0x00000080 +#define EPOLLWRNORM 0x00000100 +#define EPOLLWRBAND 0x00000200 +#define EPOLLMSG 0x00000400 +#define EPOLLRDHUP 0x00002000 +#define EPOLLWAKEUP 0x20000000 +#define EPOLLONESHOT 0x40000000 +#define EPOLLET 0x80000000 + +#define EPOLL_CTL_ADD 1 +#define EPOLL_CTL_DEL 2 +#define EPOLL_CTL_MOD 3 + +#ifdef _KERNEL +#define EPOLL_MAX_EVENTS (4 * 1024 * 1024) +typedef uint64_t epoll_data_t; +#else +union epoll_data { + void *ptr; + int fd; + uint32_t u32; + uint64_t u64; +}; + +typedef union epoll_data epoll_data_t; +#endif + +struct epoll_event { + uint32_t events; + epoll_data_t data; +}; + +#ifdef _KERNEL +int epoll_ctl_common(struct lwp *l, register_t *retval, int epfd, int op, + int fd, struct epoll_event *event); +int epoll_wait_common(struct lwp *l, register_t *retval, int epfd, + struct epoll_event *events, int maxevents, struct timespec *tsp, + const sigset_t *nss); +#else /* !_KERNEL */ +__BEGIN_DECLS +#ifdef _NETBSD_SOURCE +int epoll_create(int size); +int epoll_create1(int flags); +int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event); +int epoll_wait(int epfd, struct epoll_event *events, int maxevents, + int timeout); +int epoll_pwait(int epfd, struct epoll_event *events, int maxevents, + int timeout, const sigset_t *sigmask); +int epoll_pwait2(int epfd, struct epoll_event *events, int maxevents, + const struct timespec *timeout, const sigset_t *sigmask); +#endif /* _NETBSD_SOURCE */ +__END_DECLS +#endif /* !_KERNEL */ + +#endif /* !_SYS_EPOLL_H_ */ Index: src/tests/kernel/t_epoll.c diff -u /dev/null src/tests/kernel/t_epoll.c:1.1 --- /dev/null Fri Jul 28 14:19:02 2023 +++ src/tests/kernel/t_epoll.c Fri Jul 28 14:19:01 2023 @@ -0,0 +1,225 @@ +/* $NetBSD: t_epoll.c,v 1.1 2023/07/28 18:19:01 christos Exp $ */ + +/*- + * Copyright (c) 2023 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Theodore Preduta. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#include <sys/cdefs.h> +__RCSID("$NetBSD: t_epoll.c,v 1.1 2023/07/28 18:19:01 christos Exp $"); + +#include <sys/param.h> +#include <sys/types.h> +#include <sys/epoll.h> +#include <errno.h> + +#include <atf-c.h> + +#include "h_macros.h" + +ATF_TC(create_size); +ATF_TC_HEAD(create_size, tc) +{ + + atf_tc_set_md_var(tc, "descr", + "Checks that epoll_create requires a non-positive size"); +} +ATF_TC_BODY(create_size, tc) +{ + ATF_REQUIRE_EQ_MSG(epoll_create(-1), -1, + "epoll_create succeeded unexpectedly"); + ATF_REQUIRE_ERRNO(EINVAL, true); + + ATF_REQUIRE_EQ_MSG(epoll_create(0), -1, + "epoll_create succeeded unexpectedly"); + ATF_REQUIRE_ERRNO(EINVAL, true); + + RL(epoll_create(1)); +} + +ATF_TC(bad_epfd); +ATF_TC_HEAD(bad_epfd, tc) +{ + + atf_tc_set_md_var(tc, "descr", + "Checks that epoll_ctl detects an invalid epfd"); +} +ATF_TC_BODY(bad_epfd, tc) +{ + int fd; + struct epoll_event event; + + RL(fd = epoll_create1(0)); + event.events = EPOLLIN; + + ATF_REQUIRE_EQ_MSG(epoll_ctl(-1, EPOLL_CTL_ADD, fd, &event), -1, + "epoll_ctl succeeded unexpectedly"); + ATF_REQUIRE_ERRNO(EBADF, true); +} + +ATF_TC(bad_fd); +ATF_TC_HEAD(bad_fd, tc) +{ + + atf_tc_set_md_var(tc, "descr", + "Checks that epoll_ctl detects an invalid fd"); +} +ATF_TC_BODY(bad_fd, tc) +{ + int epfd; + struct epoll_event event; + + RL(epfd = epoll_create1(0)); + event.events = EPOLLIN; + + ATF_REQUIRE_EQ_MSG(epoll_ctl(epfd, EPOLL_CTL_ADD, -1, &event), -1, + "epoll_ctl succeeded unexpectedly"); + ATF_REQUIRE_ERRNO(EBADF, true); +} + +ATF_TC(double_add); +ATF_TC_HEAD(double_add, tc) +{ + + atf_tc_set_md_var(tc, "descr", + "Checks that epoll_ctl detects if a fd has already been added"); +} +ATF_TC_BODY(double_add, tc) +{ + int epfd, fd; + struct epoll_event event; + + RL(epfd = epoll_create1(0)); + RL(fd = epoll_create1(0)); + event.events = EPOLLIN; + + RL(epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &event)); + + ATF_REQUIRE_EQ_MSG(epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &event), -1, + "epoll_ctl succeeded unexpectedly"); + ATF_REQUIRE_ERRNO(EEXIST, true); +} + +ATF_TC(not_added); +ATF_TC_HEAD(not_added, tc) +{ + + atf_tc_set_md_var(tc, "descr", + "Checks that epoll_ctl detects if a fd has not been added"); +} +ATF_TC_BODY(not_added, tc) +{ + int epfd, fd; + struct epoll_event event; + + RL(epfd = epoll_create1(0)); + RL(fd = epoll_create1(0)); + event.events = EPOLLIN; + + ATF_REQUIRE_EQ_MSG(epoll_ctl(epfd, EPOLL_CTL_MOD, fd, &event), -1, + "epoll_ctl succeeded unexpectedly"); + ATF_REQUIRE_ERRNO(ENOENT, true); + + ATF_REQUIRE_EQ_MSG(epoll_ctl(epfd, EPOLL_CTL_DEL, fd, NULL), -1, + "epoll_ctl succeeded unexpectedly"); + ATF_REQUIRE_ERRNO(ENOENT, true); +} + +ATF_TC(watching_self); +ATF_TC_HEAD(watching_self, tc) +{ + + atf_tc_set_md_var(tc, "descr", + "Checks that epoll disallows watching itself"); +} +ATF_TC_BODY(watching_self, tc) +{ + int epfd; + struct epoll_event event; + + RL(epfd = epoll_create1(0)); + ATF_REQUIRE_EQ_MSG(epoll_ctl(epfd, EPOLL_CTL_ADD, epfd, &event), -1, + "epoll_ctl succeeded unexpectedly"); + ATF_REQUIRE_ERRNO(EINVAL, true); +} + +ATF_TC(watch_loops); +ATF_TC_HEAD(watch_loops, tc) +{ + + atf_tc_set_md_var(tc, "descr", "Checks that epoll disallows loops"); +} +ATF_TC_BODY(watch_loops, tc) +{ + int epfd1, epfd2; + struct epoll_event event; + + event.events = EPOLLIN; + RL(epfd1 = epoll_create1(0)); + RL(epfd2 = epoll_create1(0)); + RL(epoll_ctl(epfd1, EPOLL_CTL_ADD, epfd2, &event)); + ATF_REQUIRE_EQ_MSG(epoll_ctl(epfd2, EPOLL_CTL_ADD, epfd1, &event), -1, + "epoll_ctl succeeded unexpectedly"); + ATF_REQUIRE_ERRNO(ELOOP, true); +} + +ATF_TC(watch_depth); +ATF_TC_HEAD(watch_depth, tc) +{ + + atf_tc_set_md_var(tc, "descr", + "Checks that epoll fails when the watch depth exceeds 5"); +} +ATF_TC_BODY(watch_depth, tc) +{ + int epfd, tmp; + struct epoll_event event; + + event.events = EPOLLIN; + RL(epfd = epoll_create1(0)); + for (size_t i = 0; i < 4; i++) { + RL(tmp = epoll_create1(0)); + RL(epoll_ctl(tmp, EPOLL_CTL_ADD, epfd, &event)); + epfd = tmp; + } + RL(tmp = epoll_create1(0)); + ATF_REQUIRE_EQ_MSG(epoll_ctl(tmp, EPOLL_CTL_ADD, epfd, &event), -1, + "epoll_ctl succeeded unexpectedly"); + ATF_REQUIRE_ERRNO(EINVAL, true); +} + +ATF_TP_ADD_TCS(tp) +{ + ATF_TP_ADD_TC(tp, create_size); + ATF_TP_ADD_TC(tp, bad_epfd); + ATF_TP_ADD_TC(tp, bad_fd); + ATF_TP_ADD_TC(tp, not_added); + ATF_TP_ADD_TC(tp, watching_self); + ATF_TP_ADD_TC(tp, watch_loops); + ATF_TP_ADD_TC(tp, watch_depth); + + return atf_no_error(); +}