Module Name:    src
Committed By:   thorpej
Date:           Sun Sep 19 15:51:28 UTC 2021

Modified Files:
        src/distrib/sets/lists/base: shl.mi
        src/distrib/sets/lists/comp: mi
        src/distrib/sets/lists/debug: mi shl.mi
        src/distrib/sets/lists/tests: mi
        src/lib/libc: shlib_version
        src/lib/libc/sys: Makefile.inc
        src/sys/kern: files.kern syscalls.master
        src/sys/sys: Makefile file.h
        src/tests/lib/libc/sys: Makefile
Added Files:
        src/lib/libc/sys: eventfd.2 eventfd_read.c eventfd_write.c timerfd.2
        src/sys/kern: sys_eventfd.c sys_timerfd.c
        src/sys/sys: eventfd.h timerfd.h
        src/tests/lib/libc/sys: t_eventfd.c t_timerfd.c

Log Message:
Add native implementations of eventfd(2) and timerfd(2), compatible with
the Linux interfaces of the same name.


To generate a diff of this commit:
cvs rdiff -u -r1.925 -r1.926 src/distrib/sets/lists/base/shl.mi
cvs rdiff -u -r1.2392 -r1.2393 src/distrib/sets/lists/comp/mi
cvs rdiff -u -r1.360 -r1.361 src/distrib/sets/lists/debug/mi
cvs rdiff -u -r1.282 -r1.283 src/distrib/sets/lists/debug/shl.mi
cvs rdiff -u -r1.1125 -r1.1126 src/distrib/sets/lists/tests/mi
cvs rdiff -u -r1.291 -r1.292 src/lib/libc/shlib_version
cvs rdiff -u -r1.245 -r1.246 src/lib/libc/sys/Makefile.inc
cvs rdiff -u -r0 -r1.1 src/lib/libc/sys/eventfd.2 \
    src/lib/libc/sys/eventfd_read.c src/lib/libc/sys/eventfd_write.c \
    src/lib/libc/sys/timerfd.2
cvs rdiff -u -r1.56 -r1.57 src/sys/kern/files.kern
cvs rdiff -u -r0 -r1.2 src/sys/kern/sys_eventfd.c src/sys/kern/sys_timerfd.c
cvs rdiff -u -r1.307 -r1.308 src/sys/kern/syscalls.master
cvs rdiff -u -r1.177 -r1.178 src/sys/sys/Makefile
cvs rdiff -u -r0 -r1.2 src/sys/sys/eventfd.h src/sys/sys/timerfd.h
cvs rdiff -u -r1.87 -r1.88 src/sys/sys/file.h
cvs rdiff -u -r1.68 -r1.69 src/tests/lib/libc/sys/Makefile
cvs rdiff -u -r0 -r1.2 src/tests/lib/libc/sys/t_eventfd.c \
    src/tests/lib/libc/sys/t_timerfd.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/distrib/sets/lists/base/shl.mi
diff -u src/distrib/sets/lists/base/shl.mi:1.925 src/distrib/sets/lists/base/shl.mi:1.926
--- src/distrib/sets/lists/base/shl.mi:1.925	Thu Sep  2 11:28:45 2021
+++ src/distrib/sets/lists/base/shl.mi	Sun Sep 19 15:51:27 2021
@@ -1,4 +1,4 @@
-# $NetBSD: shl.mi,v 1.925 2021/09/02 11:28:45 christos Exp $
+# $NetBSD: shl.mi,v 1.926 2021/09/19 15:51:27 thorpej Exp $
 #
 # Note:	Don't delete entries from here - mark them as "obsolete" instead,
 #	unless otherwise stated below.
@@ -24,7 +24,7 @@
 ./lib/libblocklist.so.0.0			base-sys-shlib		dynamicroot
 ./lib/libc.so					base-sys-shlib		dynamicroot
 ./lib/libc.so.12				base-sys-shlib		dynamicroot
-./lib/libc.so.12.218				base-sys-shlib		dynamicroot
+./lib/libc.so.12.219				base-sys-shlib		dynamicroot
 ./lib/libcrypt.so				base-sys-shlib		dynamicroot
 ./lib/libcrypt.so.1				base-sys-shlib		dynamicroot
 ./lib/libcrypt.so.1.0				base-sys-shlib		dynamicroot
@@ -251,7 +251,7 @@
 ./usr/lib/libc++.so.1.0				base-sys-shlib		compatfile,libcxx
 ./usr/lib/libc.so				base-sys-shlib		compatfile
 ./usr/lib/libc.so.12				base-sys-shlib		compatfile
-./usr/lib/libc.so.12.218			base-sys-shlib		compatfile
+./usr/lib/libc.so.12.219			base-sys-shlib		compatfile
 ./usr/lib/libcbor.so				base-sys-shlib		compatfile
 ./usr/lib/libcbor.so.0				base-sys-shlib		compatfile
 ./usr/lib/libcbor.so.0.5			base-sys-shlib		compatfile

Index: src/distrib/sets/lists/comp/mi
diff -u src/distrib/sets/lists/comp/mi:1.2392 src/distrib/sets/lists/comp/mi:1.2393
--- src/distrib/sets/lists/comp/mi:1.2392	Thu Sep 16 23:32:49 2021
+++ src/distrib/sets/lists/comp/mi	Sun Sep 19 15:51:27 2021
@@ -1,4 +1,4 @@
-#	$NetBSD: mi,v 1.2392 2021/09/16 23:32:49 christos Exp $
+#	$NetBSD: mi,v 1.2393 2021/09/19 15:51:27 thorpej Exp $
 #
 # Note: don't delete entries from here - mark them as "obsolete" instead.
 ./etc/mtree/set.comp				comp-sys-root
@@ -3279,6 +3279,7 @@
 ./usr/include/sys/errno.h			comp-c-include
 ./usr/include/sys/evcnt.h			comp-c-include
 ./usr/include/sys/event.h			comp-c-include
+./usr/include/sys/eventfd.h			comp-c-include
 ./usr/include/sys/exec.h			comp-c-include
 ./usr/include/sys/exec_aout.h			comp-c-include
 ./usr/include/sys/exec_coff.h			comp-c-include
@@ -3430,6 +3431,7 @@
 ./usr/include/sys/time.h			comp-c-include
 ./usr/include/sys/timeb.h			comp-c-include
 ./usr/include/sys/timepps.h			comp-c-include
+./usr/include/sys/timerfd.h			comp-c-include
 ./usr/include/sys/times.h			comp-c-include
 ./usr/include/sys/timespec.h			comp-c-include
 ./usr/include/sys/timex.h			comp-c-include
@@ -4659,6 +4661,9 @@
 ./usr/share/man/cat2/dup2.0			comp-c-catman		.cat
 ./usr/share/man/cat2/dup3.0			comp-c-catman		.cat
 ./usr/share/man/cat2/errno.0			comp-c-catman		.cat
+./usr/share/man/cat2/eventfd.0			comp-c-catman		.cat
+./usr/share/man/cat2/eventfd_read.0		comp-c-catman		.cat
+./usr/share/man/cat2/eventfd_write.0		comp-c-catman		.cat
 ./usr/share/man/cat2/execve.0			comp-c-catman		.cat
 ./usr/share/man/cat2/extattr_delete_fd.0	comp-c-catman		.cat
 ./usr/share/man/cat2/extattr_delete_file.0	comp-c-catman		.cat
@@ -4897,6 +4902,10 @@
 ./usr/share/man/cat2/timer_getoverrun.0		comp-c-catman		.cat
 ./usr/share/man/cat2/timer_gettime.0		comp-c-catman		.cat
 ./usr/share/man/cat2/timer_settime.0		comp-c-catman		.cat
+./usr/share/man/cat2/timerfd.0			comp-c-catman		.cat
+./usr/share/man/cat2/timerfd_create.0		comp-c-catman		.cat
+./usr/share/man/cat2/timerfd_gettime.0		comp-c-catman		.cat
+./usr/share/man/cat2/timerfd_settime.0		comp-c-catman		.cat
 ./usr/share/man/cat2/truncate.0			comp-c-catman		.cat
 ./usr/share/man/cat2/ucontext.0			comp-c-catman		.cat
 ./usr/share/man/cat2/umask.0			comp-c-catman		.cat
@@ -12994,6 +13003,9 @@
 ./usr/share/man/html2/dup2.html			comp-c-htmlman		html
 ./usr/share/man/html2/dup3.html			comp-c-htmlman		html
 ./usr/share/man/html2/errno.html		comp-c-htmlman		html
+./usr/share/man/html2/eventfd.html		comp-c-htmlman		html
+./usr/share/man/html2/eventfd_read.html		comp-c-htmlman		html
+./usr/share/man/html2/eventfd_write.html	comp-c-htmlman		html
 ./usr/share/man/html2/execve.html		comp-c-htmlman		html
 ./usr/share/man/html2/extattr_delete_fd.html	comp-c-htmlman		html
 ./usr/share/man/html2/extattr_delete_file.html	comp-c-htmlman		html
@@ -13218,6 +13230,10 @@
 ./usr/share/man/html2/timer_getoverrun.html	comp-c-htmlman		html
 ./usr/share/man/html2/timer_gettime.html	comp-c-htmlman		html
 ./usr/share/man/html2/timer_settime.html	comp-c-htmlman		html
+./usr/share/man/html2/timerfd.html		comp-c-htmlman		html
+./usr/share/man/html2/timerfd_create.html	comp-c-htmlman		html
+./usr/share/man/html2/timerfd_gettime.html	comp-c-htmlman		html
+./usr/share/man/html2/timerfd_settime.html	comp-c-htmlman		html
 ./usr/share/man/html2/truncate.html		comp-c-htmlman		html
 ./usr/share/man/html2/ucontext.html		comp-c-htmlman		html
 ./usr/share/man/html2/umask.html		comp-c-htmlman		html
@@ -21146,6 +21162,9 @@
 ./usr/share/man/man2/dup2.2			comp-c-man		.man
 ./usr/share/man/man2/dup3.2			comp-c-man		.man
 ./usr/share/man/man2/errno.2			comp-c-man		.man
+./usr/share/man/man2/eventfd.2			comp-c-man		.man
+./usr/share/man/man2/eventfd_read.2		comp-c-man		.man
+./usr/share/man/man2/eventfd_write.2		comp-c-man		.man
 ./usr/share/man/man2/execve.2			comp-c-man		.man
 ./usr/share/man/man2/extattr_delete_fd.2	comp-c-man		.man
 ./usr/share/man/man2/extattr_delete_file.2	comp-c-man		.man
@@ -21382,6 +21401,10 @@
 ./usr/share/man/man2/timer_getoverrun.2		comp-c-man		.man
 ./usr/share/man/man2/timer_gettime.2		comp-c-man		.man
 ./usr/share/man/man2/timer_settime.2		comp-c-man		.man
+./usr/share/man/man2/timerfd.2			comp-c-man		.man
+./usr/share/man/man2/timerfd_create.2		comp-c-man		.man
+./usr/share/man/man2/timerfd_gettime.2		comp-c-man		.man
+./usr/share/man/man2/timerfd_settime.2		comp-c-man		.man
 ./usr/share/man/man2/truncate.2			comp-c-man		.man
 ./usr/share/man/man2/ucontext.2			comp-c-man		.man
 ./usr/share/man/man2/umask.2			comp-c-man		.man

Index: src/distrib/sets/lists/debug/mi
diff -u src/distrib/sets/lists/debug/mi:1.360 src/distrib/sets/lists/debug/mi:1.361
--- src/distrib/sets/lists/debug/mi:1.360	Sun Aug 29 09:54:18 2021
+++ src/distrib/sets/lists/debug/mi	Sun Sep 19 15:51:28 2021
@@ -1,4 +1,4 @@
-# $NetBSD: mi,v 1.360 2021/08/29 09:54:18 christos Exp $
+# $NetBSD: mi,v 1.361 2021/09/19 15:51:28 thorpej Exp $
 ./etc/mtree/set.debug                           comp-sys-root
 ./usr/lib					comp-sys-usr		compatdir
 ./usr/lib/i18n/libBIG5_g.a			comp-c-debuglib		debuglib,compatfile
@@ -2157,6 +2157,7 @@
 ./usr/libdata/debug/usr/tests/lib/libc/sys/t_connect.debug		tests-lib-debug		debug,atf,compattestfile
 ./usr/libdata/debug/usr/tests/lib/libc/sys/t_context.debug		tests-obsolete		obsolete,compattestfile
 ./usr/libdata/debug/usr/tests/lib/libc/sys/t_dup.debug			tests-lib-debug		debug,atf,compattestfile
+./usr/libdata/debug/usr/tests/lib/libc/sys/t_eventfd.debug		tests-lib-debug		debug,atf,compattestfile
 ./usr/libdata/debug/usr/tests/lib/libc/sys/t_fork.debug			tests-lib-debug		debug,atf,compattestfile
 ./usr/libdata/debug/usr/tests/lib/libc/sys/t_fsync.debug		tests-lib-debug		debug,atf,compattestfile
 ./usr/libdata/debug/usr/tests/lib/libc/sys/t_futex_ops.debug           	tests-lib-tests         debug,atf,compattestfile
@@ -2224,6 +2225,7 @@
 ./usr/libdata/debug/usr/tests/lib/libc/sys/t_swapcontext.debug		tests-lib-debug		debug,atf,compattestfile
 ./usr/libdata/debug/usr/tests/lib/libc/sys/t_syscall.debug		tests-lib-debug		debug,atf,compattestfile
 ./usr/libdata/debug/usr/tests/lib/libc/sys/t_timer_create.debug		tests-lib-debug		debug,atf,compattestfile
+./usr/libdata/debug/usr/tests/lib/libc/sys/t_timerfd.debug		tests-lib-debug		debug,atf,compattestfile
 ./usr/libdata/debug/usr/tests/lib/libc/sys/t_truncate.debug		tests-lib-debug		debug,atf,compattestfile
 ./usr/libdata/debug/usr/tests/lib/libc/sys/t_ucontext.debug		tests-lib-debug		debug,atf,compattestfile
 ./usr/libdata/debug/usr/tests/lib/libc/sys/t_umask.debug		tests-lib-debug		debug,atf,compattestfile

Index: src/distrib/sets/lists/debug/shl.mi
diff -u src/distrib/sets/lists/debug/shl.mi:1.282 src/distrib/sets/lists/debug/shl.mi:1.283
--- src/distrib/sets/lists/debug/shl.mi:1.282	Thu Sep  2 11:28:45 2021
+++ src/distrib/sets/lists/debug/shl.mi	Sun Sep 19 15:51:28 2021
@@ -1,10 +1,10 @@
-# $NetBSD: shl.mi,v 1.282 2021/09/02 11:28:45 christos Exp $
+# $NetBSD: shl.mi,v 1.283 2021/09/19 15:51:28 thorpej Exp $
 ./usr/lib/libbfd_g.a						comp-c-debuglib	debuglib,compatfile,binutils
 ./usr/libdata/debug/lib						base-sys-usr	debug,dynamicroot,compatdir
 ./usr/libdata/debug/lib/libavl.so.0.0.debug			comp-zfs-debug	debug,dynamicroot,zfs
 ./usr/libdata/debug/lib/libblacklist.so.0.0.debug		comp-obsolete	obsolete,compatfile
 ./usr/libdata/debug/lib/libblocklist.so.0.0.debug		comp-sys-debug	debug,dynamicroot
-./usr/libdata/debug/lib/libc.so.12.218.debug			comp-sys-debug	debug,dynamicroot
+./usr/libdata/debug/lib/libc.so.12.219.debug			comp-sys-debug	debug,dynamicroot
 ./usr/libdata/debug/lib/libcrypt.so.1.0.debug			comp-sys-debug	debug,dynamicroot
 ./usr/libdata/debug/lib/libcrypto.so.12.0.debug			comp-sys-debug	debug,dynamicroot,openssl=10
 ./usr/libdata/debug/lib/libcrypto.so.14.0.debug			comp-sys-debug	debug,dynamicroot,openssl=11
@@ -84,7 +84,7 @@
 ./usr/libdata/debug/usr/lib/libbsdmalloc.so.0.0.debug		comp-sys-debug	debug,compatfile
 ./usr/libdata/debug/usr/lib/libbz2.so.1.1.debug			comp-sys-debug	debug,compatfile
 ./usr/libdata/debug/usr/lib/libc++.so.1.0.debug			comp-sys-debug	debug,compatfile,libcxx
-./usr/libdata/debug/usr/lib/libc.so.12.218.debug		comp-sys-debug	debug,compatfile
+./usr/libdata/debug/usr/lib/libc.so.12.219.debug		comp-sys-debug	debug,compatfile
 ./usr/libdata/debug/usr/lib/libcbor.so.0.5.debug		comp-sys-debug	debug,compatfile
 ./usr/libdata/debug/usr/lib/libcom_err.so.8.0.debug		comp-krb5-debug	debug,compatfile,kerberos
 ./usr/libdata/debug/usr/lib/libcrypt.so.1.0.debug		comp-sys-debug	debug,compatfile

Index: src/distrib/sets/lists/tests/mi
diff -u src/distrib/sets/lists/tests/mi:1.1125 src/distrib/sets/lists/tests/mi:1.1126
--- src/distrib/sets/lists/tests/mi:1.1125	Mon Sep 13 22:09:06 2021
+++ src/distrib/sets/lists/tests/mi	Sun Sep 19 15:51:28 2021
@@ -1,4 +1,4 @@
-# $NetBSD: mi,v 1.1125 2021/09/13 22:09:06 rillig Exp $
+# $NetBSD: mi,v 1.1126 2021/09/19 15:51:28 thorpej Exp $
 #
 # Note: don't delete entries from here - mark them as "obsolete" instead.
 #
@@ -3145,6 +3145,7 @@
 ./usr/tests/lib/libc/sys/t_connect			tests-lib-tests		compattestfile,atf
 ./usr/tests/lib/libc/sys/t_context			tests-obsolete		obsolete
 ./usr/tests/lib/libc/sys/t_dup				tests-lib-tests		compattestfile,atf
+./usr/tests/lib/libc/sys/t_eventfd			tests-lib-tests		compattestfile,atf
 ./usr/tests/lib/libc/sys/t_fork				tests-lib-tests		compattestfile,atf
 ./usr/tests/lib/libc/sys/t_fsync			tests-lib-tests		compattestfile,atf
 ./usr/tests/lib/libc/sys/t_futex_ops			tests-lib-tests		compattestfile,atf
@@ -3212,6 +3213,7 @@
 ./usr/tests/lib/libc/sys/t_swapcontext			tests-lib-tests		compattestfile,atf
 ./usr/tests/lib/libc/sys/t_syscall			tests-lib-tests		compattestfile,atf
 ./usr/tests/lib/libc/sys/t_timer_create			tests-lib-tests		compattestfile,atf
+./usr/tests/lib/libc/sys/t_timerfd			tests-lib-tests		compattestfile,atf
 ./usr/tests/lib/libc/sys/t_truncate			tests-lib-tests		compattestfile,atf
 ./usr/tests/lib/libc/sys/t_ucontext			tests-lib-tests		compattestfile,atf
 ./usr/tests/lib/libc/sys/t_umask			tests-lib-tests		compattestfile,atf

Index: src/lib/libc/shlib_version
diff -u src/lib/libc/shlib_version:1.291 src/lib/libc/shlib_version:1.292
--- src/lib/libc/shlib_version:1.291	Fri Oct  9 18:38:48 2020
+++ src/lib/libc/shlib_version	Sun Sep 19 15:51:28 2021
@@ -1,4 +1,4 @@
-#	$NetBSD: shlib_version,v 1.291 2020/10/09 18:38:48 christos Exp $
+#	$NetBSD: shlib_version,v 1.292 2021/09/19 15:51:28 thorpej Exp $
 #	Remember to update distrib/sets/lists/base/shl.* when changing
 #
 # things we wish to do on next major version bump:
@@ -54,4 +54,4 @@
 # - the syscall stubs for the (obsolete) lfs syscalls should be removed
 # - remove tzsetwall(3), upstream has removed it
 major=12
-minor=218
+minor=219

Index: src/lib/libc/sys/Makefile.inc
diff -u src/lib/libc/sys/Makefile.inc:1.245 src/lib/libc/sys/Makefile.inc:1.246
--- src/lib/libc/sys/Makefile.inc:1.245	Fri Aug 14 00:53:16 2020
+++ src/lib/libc/sys/Makefile.inc	Sun Sep 19 15:51:28 2021
@@ -1,4 +1,4 @@
-#	$NetBSD: Makefile.inc,v 1.245 2020/08/14 00:53:16 riastradh Exp $
+#	$NetBSD: Makefile.inc,v 1.246 2021/09/19 15:51:28 thorpej Exp $
 #	@(#)Makefile.inc	8.3 (Berkeley) 10/24/94
 
 # sys sources
@@ -7,9 +7,10 @@
 # other sources shared with the kernel, used in syscalls
 SRCS+=	cpuset.c
 # glue to offer userland wrappers for some syscalls
-SRCS+=	accept4.c clock_getcpuclockid.c posix_fadvise.c posix_madvise.c \
-	ppoll.c sched.c sigqueue.c sigtimedwait.c sigwait.c sigwaitinfo.c \
-	statvfs.c swapon.c semctl.c vadvise.c
+SRCS+=	accept4.c clock_getcpuclockid.c eventfd_read.c eventfd_write.c \
+	posix_fadvise.c posix_madvise.c ppoll.c sched.c sigqueue.c \
+	sigtimedwait.c sigwait.c sigwaitinfo.c statvfs.c swapon.c semctl.c \
+	vadvise.c
 
 .if ${RUMPRUN} != "yes"
 # modules with non-default implementations on at least one architecture:
@@ -100,6 +101,7 @@ ASM=\
 		clock_getcpuclockid2.S \
 		__clock_getres50.S __clock_gettime50.S \
 	dup.S dup2.S dup3.S \
+	eventfd.S \
 	extattrctl.S \
 		extattr_delete_fd.S extattr_delete_file.S \
 		extattr_delete_link.S extattr_get_fd.S extattr_get_file.S \
@@ -154,6 +156,7 @@ ASM=\
 		__statvfs190.S swapctl.S symlink.S symlinkat.S __sysctl.S \
 	timer_create.S timer_delete.S __timer_gettime50.S timer_getoverrun.S \
 		____semctl50.S __timer_settime50.S \
+	timerfd_create.S timerfd_gettime.S timerfd_settime.S \
 	umask.S undelete.S unlink.S unlinkat.S unmount.S __utimes50.S \
 		utimensat.S utrace.S uuidgen.S
 
@@ -251,7 +254,7 @@ LintSysPseudoNoerr.c: ${LIBCDIR}/sys/mak
 MAN+=	accept.2 access.2 acct.2 adjtime.2 bind.2 brk.2 chdir.2 \
 	chflags.2 chmod.2 chown.2 chroot.2 clock_getcpuclockid2.2 \
 	clock_settime.2 clone.2 close.2 \
-	connect.2 dup.2 execve.2 _exit.2 extattr_get_file.2 \
+	connect.2 dup.2 eventfd.2 execve.2 _exit.2 extattr_get_file.2 \
 	fcntl.2 fdatasync.2 fdiscard.2 fhopen.2 \
 	flock.2 fork.2 fsync.2 getcontext.2 getdents.2 \
 	getfh.2 getvfsstat.2 getgid.2 getgroups.2 \
@@ -284,7 +287,7 @@ MAN+=	accept.2 access.2 acct.2 adjtime.2
 	socket.2 \
 	socketpair.2 stat.2 statvfs.2 swapctl.2 swapon.3 symlink.2 \
 	sync.2 sysarch.2 syscall.2 timer_create.2 timer_delete.2 \
-	timer_settime.2 truncate.2 umask.2 undelete.2 \
+	timer_settime.2 timerfd.2 truncate.2 umask.2 undelete.2 \
 	unlink.2 utimes.2 utrace.2 uuidgen.2 vfork.2 wait.2 write.2
 
 MLINKS+=_exit.2 _Exit.2
@@ -299,6 +302,8 @@ MLINKS+=chown.2 fchown.2 chown.2 lchown.
 MLINKS+=chroot.2 fchroot.2
 MLINKS+=clock_settime.2 clock_gettime.2
 MLINKS+=clock_settime.2 clock_getres.2
+MLINKS+=eventfd.2 eventfd_read.2 \
+	eventfd.2 eventfd_write.2
 MLINKS+=extattr_get_file.2 extattr_set_file.2 \
 	extattr_get_file.2 extattr_delete_file.2 \
 	extattr_get_file.2 extattr_list_file.2 \
@@ -380,6 +385,9 @@ MLINKS+=statvfs.2 fstatvfs.2
 MLINKS+=statvfs.2 statvfs1.2
 MLINKS+=statvfs.2 fstatvfs1.2
 MLINKS+=syscall.2 __syscall.2
+MLINKS+=timerfd.2 timerfd_create.2 \
+	timerfd.2 timerfd_gettime.2 \
+	timerfd.2 timerfd_settime.2
 MLINKS+=truncate.2 ftruncate.2
 MLINKS+=unlink.2 unlinkat.2
 MLINKS+=utimes.2 futimes.2 utimes.2 lutimes.2

Index: src/sys/kern/files.kern
diff -u src/sys/kern/files.kern:1.56 src/sys/kern/files.kern:1.57
--- src/sys/kern/files.kern:1.56	Tue May 18 05:16:09 2021
+++ src/sys/kern/files.kern	Sun Sep 19 15:51:27 2021
@@ -1,4 +1,4 @@
-#	$NetBSD: files.kern,v 1.56 2021/05/18 05:16:09 mrg Exp $
+#	$NetBSD: files.kern,v 1.57 2021/09/19 15:51:27 thorpej Exp $
 
 #
 # kernel sources
@@ -158,6 +158,7 @@ file	kern/subr_workqueue.c		kern
 file	kern/subr_xcall.c		kern
 file	kern/sys_aio.c			aio
 file	kern/sys_descrip.c		kern
+file	kern/sys_eventfd.c		kern
 file	kern/sys_futex.c		kern
 file	kern/sys_generic.c		kern
 file	kern/sys_getrandom.c		kern
@@ -174,6 +175,7 @@ file	kern/sys_select.c		kern
 file	kern/sys_sig.c			kern
 file	kern/sys_sched.c		kern
 file	kern/sys_socket.c		kern
+file	kern/sys_timerfd.c		kern
 file	kern/syscalls.c			syscall_debug | kdtrace_hooks
 file	kern/sysv_ipc.c			sysvshm | sysvsem | sysvmsg
 file	kern/sysv_msg.c			sysvmsg

Index: src/sys/kern/syscalls.master
diff -u src/sys/kern/syscalls.master:1.307 src/sys/kern/syscalls.master:1.308
--- src/sys/kern/syscalls.master:1.307	Mon Nov  2 18:55:12 2020
+++ src/sys/kern/syscalls.master	Sun Sep 19 15:51:27 2021
@@ -1,4 +1,4 @@
-	$NetBSD: syscalls.master,v 1.307 2020/11/02 18:55:12 christos Exp $
+	$NetBSD: syscalls.master,v 1.308 2021/09/19 15:51:27 thorpej Exp $
 
 ;	@(#)syscalls.master	8.2 (Berkeley) 1/13/94
 
@@ -398,9 +398,13 @@
 #else
 176	EXCL		ntp_adjtime
 #endif
-177	UNIMPL
-178	UNIMPL
-179	UNIMPL
+177	STD		{ int|sys||timerfd_create(clockid_t clock_id, \
+			    int flags); }
+178	STD		{ int|sys||timerfd_settime(int fd, int flags, \
+			    const struct itimerspec *new_value, \
+			    struct itimerspec *old_value); }
+179	STD		{ int|sys||timerfd_gettime(int fd, \
+			    struct itimerspec *curr_value); }
 180	UNIMPL
 
 ; Syscalls 180-199 are used by/reserved for BSD
@@ -565,7 +569,7 @@
 		{ ssize_t|sys||mq_timedreceive(mqd_t mqdes, \
 		    char *msg_ptr, size_t msg_len, unsigned *msg_prio, \
 		    const struct timespec50 *abs_timeout); }
-267	UNIMPL
+267	STD		{ int|sys||eventfd(unsigned int val, int flags); }
 268	UNIMPL
 269	UNIMPL
 270	STD	RUMP	{ int|sys||__posix_rename(const char *from, \

Index: src/sys/sys/Makefile
diff -u src/sys/sys/Makefile:1.177 src/sys/sys/Makefile:1.178
--- src/sys/sys/Makefile:1.177	Wed Sep 15 17:33:08 2021
+++ src/sys/sys/Makefile	Sun Sep 19 15:51:27 2021
@@ -1,4 +1,4 @@
-#	$NetBSD: Makefile,v 1.177 2021/09/15 17:33:08 thorpej Exp $
+#	$NetBSD: Makefile,v 1.178 2021/09/19 15:51:27 thorpej Exp $
 
 .include <bsd.own.mk>
 
@@ -18,7 +18,7 @@ INCS=	acct.h acl.h agpio.h aio.h ansi.h 
 	dir.h dirent.h \
 	disk.h disklabel.h disklabel_acorn.h disklabel_gpt.h disklabel_rdb.h \
 	dkbad.h dkio.h dkstat.h domain.h drvctlio.h dvdio.h \
-	endian.h envsys.h errno.h evcnt.h event.h exec.h exec_aout.h \
+	endian.h envsys.h errno.h evcnt.h event.h eventfd.h exec.h exec_aout.h \
 	exec_coff.h exec_ecoff.h exec_elf.h exec_script.h extattr.h extent.h \
 	fault.h \
 	fcntl.h fd_set.h fdio.h featuretest.h file.h filedesc.h filio.h \
@@ -42,7 +42,7 @@ INCS=	acct.h acl.h agpio.h aio.h ansi.h 
 	socketvar.h sockio.h spawn.h specificdata.h stat.h \
 	statvfs.h syscall.h syscallargs.h sysctl.h stdarg.h stdbool.h \
 	stdint.h swap.h syncobj.h syslimits.h syslog.h \
-	tape.h termios.h time.h timeb.h timepps.h times.h timespec.h \
+	tape.h termios.h time.h timeb.h timepps.h timerfd.h times.h timespec.h \
 	timex.h tls.h trace.h tree.h tty.h ttychars.h ttycom.h \
 	ttydefaults.h ttydev.h types.h \
 	ucontext.h ucred.h uio.h un.h unistd.h unpcb.h utsname.h uuid.h \

Index: src/sys/sys/file.h
diff -u src/sys/sys/file.h:1.87 src/sys/sys/file.h:1.88
--- src/sys/sys/file.h:1.87	Sat Sep 11 10:08:55 2021
+++ src/sys/sys/file.h	Sun Sep 19 15:51:27 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: file.h,v 1.87 2021/09/11 10:08:55 riastradh Exp $	*/
+/*	$NetBSD: file.h,v 1.88 2021/09/19 15:51:27 thorpej Exp $	*/
 
 /*-
  * Copyright (c) 2009 The NetBSD Foundation, Inc.
@@ -102,6 +102,8 @@ union file_data {
 	struct socket *fd_so;		// DTYPE_SOCKET
 	struct pipe *fd_pipe;		// DTYPE_PIPE
 	struct kqueue *fd_kq;		// DTYPE_KQUEUE
+	struct eventfd *fd_eventfd;	// DTYPE_EVENTFD
+	struct timerfd *fd_timerfd;	// DTYPE_TIMERFD
 	void *fd_data;			// DTYPE_MISC
 	struct audio_file *fd_audioctx;	// DTYPE_MISC (audio)
 	struct pad_softc *fd_pad;	// DTYPE_MISC (pad)
@@ -148,6 +150,8 @@ struct file {
 #define f_data		f_undata.fd_data
 #define f_mqueue	f_undata.fd_mq
 #define f_ksem		f_undata.fd_ks
+#define f_eventfd	f_undata.fd_eventfd
+#define f_timerfd	f_undata.fd_timerfd
 
 #define f_rndctx	f_undata.fd_rndctx
 #define f_audioctx	f_undata.fd_audioctx
@@ -170,10 +174,12 @@ struct file {
 #define	DTYPE_CRYPTO	6		/* crypto */
 #define	DTYPE_MQUEUE	7		/* message queue */
 #define	DTYPE_SEM	8		/* semaphore */
+#define	DTYPE_EVENTFD	9		/* eventfd */
+#define	DTYPE_TIMERFD	10		/* timerfd */
 
 #define DTYPE_NAMES	\
     "0", "file", "socket", "pipe", "kqueue", "misc", "crypto", "mqueue", \
-    "semaphore"
+    "semaphore", "eventfd", "timerfd"
 
 #ifdef _KERNEL
 

Index: src/tests/lib/libc/sys/Makefile
diff -u src/tests/lib/libc/sys/Makefile:1.68 src/tests/lib/libc/sys/Makefile:1.69
--- src/tests/lib/libc/sys/Makefile:1.68	Sun Sep  6 07:20:31 2020
+++ src/tests/lib/libc/sys/Makefile	Sun Sep 19 15:51:28 2021
@@ -1,4 +1,4 @@
-# $NetBSD: Makefile,v 1.68 2020/09/06 07:20:31 mrg Exp $
+# $NetBSD: Makefile,v 1.69 2021/09/19 15:51:28 thorpej Exp $
 
 MKMAN=	no
 
@@ -16,6 +16,7 @@ TESTS_C+=		t_clock_nanosleep
 TESTS_C+=		t_clone
 TESTS_C+=		t_connect
 TESTS_C+=		t_dup
+TESTS_C+=		t_eventfd
 TESTS_C+=		t_fork
 TESTS_C+=		t_fsync
 TESTS_C+=		t_futex_ops
@@ -81,6 +82,7 @@ TESTS_C+=		t_swapcontext
 TESTS_C+=		t_stat
 TESTS_C+=		t_syscall
 TESTS_C+=		t_timer_create
+TESTS_C+=		t_timerfd
 TESTS_C+=		t_truncate
 TESTS_C+=		t_ucontext
 TESTS_C+=		t_umask
@@ -93,7 +95,9 @@ TESTS_C+=		t_write
 
 SRCS.t_mprotect=	t_mprotect.c ${SRCS_EXEC_PROT} t_mprotect_helper.c
 
-LDADD.t_getpid+=        -lpthread
+LDADD.t_eventfd+=	-lpthread
+LDADD.t_getpid+=	-lpthread
+LDADD.t_timerfd+=	-lpthread
 
 LDADD.t_ptrace_sigchld+=	-pthread -lm
 

Added files:

Index: src/lib/libc/sys/eventfd.2
diff -u /dev/null src/lib/libc/sys/eventfd.2:1.1
--- /dev/null	Sun Sep 19 15:51:29 2021
+++ src/lib/libc/sys/eventfd.2	Sun Sep 19 15:51:28 2021
@@ -0,0 +1,271 @@
+.\"     $NetBSD: eventfd.2,v 1.1 2021/09/19 15:51:28 thorpej Exp $
+.\"
+.\" Copyright (c) 2021 The NetBSD Foundation, Inc.
+.\" All rights reserved.
+.\"
+.\" This code is derived from software contributed to The NetBSD Foundation
+.\" by Jason R. Thorpe.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.\" PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd September 17, 2021
+.Dt EVENTFD 2
+.Os
+.Sh NAME
+.Nm eventfd ,
+.Nm eventfd_read ,
+.Nm eventfd_write
+.Nd create and interact with a counting event descriptor
+.Sh SYNOPSIS
+.In sys/eventfd.h
+.Ft int
+.Fn eventfd "unsigned int val" "int flags"
+.Ft int
+.Fn eventfd_read "int efd" "eventfd_t *valp"
+.Ft int
+.Fn eventfd_write "int efd" "eventfd_t val"
+.Sh DESCRIPTION
+The
+.Nm
+interface presents a simple counting object associated with a file descriptor.
+Writes and reads to this file descriptor increment and decrement the count,
+respectively.
+When the object's value is non-zero, the file descriptor is considered
+.Dq readable ,
+and when the count is less than the maximum value
+.Po
+.Dv UINT64_MAX
+- 1
+.Pc
+it is considered
+.Dq writable .
+When an
+.Nm
+object is no longer needed, it may be disposed of using
+.Xr close 2 .
+.Pp
+All I/O to an
+.Nm
+object is 8 bytes in length, which is the space required to store an
+unsigned 64-bit integer.
+Any read or write with a buffer smaller than 8 bytes will fail with
+.Dv EINVAL .
+Only the first 8 bytes of the buffer will be used.
+.Pp
+The
+.Fn eventfd
+function creates a new counting event object and returns a file descriptor
+representing that object.
+The initial value of the object is specified by the
+.Fa val
+argument.
+The following flags define the behavior of the resulting object:
+.Bl -tag -width "EFD_SEMAPHORE"
+.It Dv EFD_CLOEXEC
+This is an alias for the
+.Dv O_CLOEXEC
+flag; see
+.Xr open 2
+for more information.
+.It Dv EFD_NONBLOCK
+This is an alias for the
+.Dv O_NONBLOCK
+flag; see
+.Xr open 2
+for more information.
+.It Dv EFD_SEMAPHORE
+Creates a
+.Dq semaphore mode
+object; see below for details.
+.El
+.Pp
+Reads from an
+.Nm
+object return an unsigned 64-bit integer in the caller's buffer.
+The semantics of this value are dependent on whether the
+.Nm
+object was created in
+.Dq semaphore mode :
+.Bl -bullet
+.It
+If the
+.Nm
+object was created in
+.Dq semaphore mode ,
+reads return the value
+.Dv 1
+and object's counter is decremented by
+.Dv 1 .
+.It
+If the
+.Nm
+object was not created in
+.Dq semaphore mode ,
+reads return the current value of the object's counter
+reset the counter to
+.Dv 0 .
+.El
+.Pp
+If the value of the
+.Nm
+object's counter is
+.Dv 0 ,
+then reads will block, unless the
+.Nm
+object is set for non-blocking I/O.
+.Pp
+Writing to an
+.Nm
+object adds the unsigned 64-bit value provided in the caller's buffer
+to the
+.Nm
+object's counter.
+If adding the specified value would exceed the maximum value, then the
+write will block, unless the
+.Nm
+object is set for non-blocking I/O.
+.Pp
+The convenience functions
+.Fn eventfd_read
+and
+.Fn eventfd_write
+are provided to simplify interacting with
+.Nm
+objects, and are simply wrappers around the
+.Xr read 2
+and
+.Xr write 2
+system calls:
+.Bl -tag -width "eventfd_writeXX"
+.It Fn eventfd_read
+Reads the unsigned 64-bit integer value of the
+.Nm
+object and returns it in
+.Fa valp .
+.It Fn eventfd_write
+Writes the unsigned 64-bit integer value
+.Fa val
+to the
+.Nm
+object.
+.El
+.Sh RETURN VALUES
+The
+.Fn eventfd
+system call returns
+.Dv -1
+if an error occurs, otherwise the return value is a descriptor representing the
+.Nm
+object.
+.Pp
+The
+.Fn eventfd_read
+and
+.Fn eventfd_write
+functions return
+.Dv 0
+upon success or
+.Dv -1
+if an error occurs.
+.Sh ERRORS
+The
+.Fn eventfd
+system call fails if:
+.Bl -tag -width Er
+.It Bq Er EINVAL
+Flags other than
+.Dv EFD_CLOEXEC ,
+.Dv EFD_NONBLOCK ,
+and
+.Dv EFD_SEMAPHORE
+are set in the
+.Fa flags
+argument.
+.It Bq Er EMFILE
+The per-process descriptor table is full.
+.It Bq Er ENFILE
+The system file table is full.
+.El
+.Pp
+The
+.Fn eventfd_read
+function fails if:
+.Bl -tag -width Er
+.It Bq Er EAGAIN
+The value of the
+.Nm
+object is
+.Dv 0
+and the
+.Nm
+object is set for non-blocking I/O.
+.El
+.Pp
+The
+.Fn eventfd_write
+function fails if:
+.Bl -tag -width Er
+.It Bq Er EAGAIN
+The resulting value of the
+.Nm
+object after adding the value
+.Fa val
+would exceed the maximum value
+.Po
+.Dv UINT64_MAX
+- 1
+.Pc
+and the
+.Nm
+object is set for non-blocking I/O.
+.It Bq Er EINVAL
+An attempt was made to write a value greater than the maximum value.
+.El
+.Pp
+In addition to the errors returned by
+.Fn eventfd_read
+and
+.Fn eventfd_write ,
+a read from or write to an
+.Nm
+object fails if:
+.Bl -tag -width Er
+.It Bq Er EINVAL
+The size of the buffer is less than 8 bytes
+.Pq the size required to hold an unsigned 64-bit integer .
+.El
+.Sh SEE ALSO
+.Xr close 2 ,
+.Xr kevent 2 ,
+.Xr open 2 ,
+.Xr poll 2 ,
+.Xr read 2 ,
+.Xr select 2 ,
+.Xr write 2
+.Sh HISTORY
+The
+.Nm
+interface first appeared in
+.Nx 10 .
+It is compatible with the
+.Nm
+interface that appeared in Linux 2.6.30.
Index: src/lib/libc/sys/eventfd_read.c
diff -u /dev/null src/lib/libc/sys/eventfd_read.c:1.1
--- /dev/null	Sun Sep 19 15:51:29 2021
+++ src/lib/libc/sys/eventfd_read.c	Sun Sep 19 15:51:28 2021
@@ -0,0 +1,60 @@
+/* $NetBSD: eventfd_read.c,v 1.1 2021/09/19 15:51:28 thorpej Exp $ */
+
+/*-
+ * Copyright (c) 2020 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason R. Thorpe.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/cdefs.h>
+#if defined(LIBC_SCCS) && !defined(lint)
+__RCSID("$NetBSD: eventfd_read.c,v 1.1 2021/09/19 15:51:28 thorpej Exp $");
+#endif /* LIBC_SCCS and not lint */
+
+#include "namespace.h"
+#include <sys/types.h>
+#include <sys/eventfd.h>
+#include <errno.h>
+#include <unistd.h>
+
+int
+eventfd_read(int efd, eventfd_t *valp)
+{
+	eventfd_t val;
+
+	switch (read(efd, &val, sizeof(val))) {
+	case -1:
+		return -1;
+
+	case sizeof(val):
+		*valp = val;
+		return 0;
+
+	default:
+		/* This should never happen, but... */
+		errno = EIO;
+		return -1;
+	}
+}
Index: src/lib/libc/sys/eventfd_write.c
diff -u /dev/null src/lib/libc/sys/eventfd_write.c:1.1
--- /dev/null	Sun Sep 19 15:51:29 2021
+++ src/lib/libc/sys/eventfd_write.c	Sun Sep 19 15:51:28 2021
@@ -0,0 +1,58 @@
+/* $NetBSD: eventfd_write.c,v 1.1 2021/09/19 15:51:28 thorpej Exp $ */
+
+/*-
+ * Copyright (c) 2020 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason R. Thorpe.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/cdefs.h>
+#if defined(LIBC_SCCS) && !defined(lint)
+__RCSID("$NetBSD: eventfd_write.c,v 1.1 2021/09/19 15:51:28 thorpej Exp $");
+#endif /* LIBC_SCCS and not lint */
+
+#include "namespace.h"
+#include <sys/types.h>
+#include <sys/eventfd.h>
+#include <errno.h>
+#include <unistd.h>
+
+int
+eventfd_write(int efd, eventfd_t val)
+{
+
+	switch (write(efd, &val, sizeof(val))) {
+	case -1:
+		return -1;
+
+	case sizeof(val):
+		return 0;
+
+	default:
+		/* This should never happen, but... */
+		errno = EIO;
+		return -1;
+	}
+}
Index: src/lib/libc/sys/timerfd.2
diff -u /dev/null src/lib/libc/sys/timerfd.2:1.1
--- /dev/null	Sun Sep 19 15:51:29 2021
+++ src/lib/libc/sys/timerfd.2	Sun Sep 19 15:51:28 2021
@@ -0,0 +1,321 @@
+.\"     $NetBSD: timerfd.2,v 1.1 2021/09/19 15:51:28 thorpej Exp $
+.\"
+.\" Copyright (c) 2021 The NetBSD Foundation, Inc.
+.\" All rights reserved.
+.\"
+.\" This code is derived from software contributed to The NetBSD Foundation
+.\" by Jason R. Thorpe.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.\" PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd September 17, 2021
+.Dt TIMERFD 2
+.Os
+.Sh NAME
+.Nm timerfd ,
+.Nm timerfd_create ,
+.Nm timerfd_gettime ,
+.Nm timerfd_settime ,
+.Nd create and interact with a timer descriptor
+.Sh SYNOPSIS
+.In sys/timerfd.h
+.Ft int
+.Fn timerfd_create "clockid_t clockid" "int flags"
+.Ft int
+.Fn timerfd_gettime "int fd" "struct itimerspec *tim"
+.Ft int
+.Fn timerfd_settime "int fd" "int flags" \
+"const struct itimerspec *tim" "struct itimerspec *otim"
+.Sh DESCRIPTION
+.Nm
+presents an interface to interval timers associated with a file descriptor.
+These timers are functionally equivalent to per-process timers but are
+associated with a file descriptor, rather than a process.
+Because they are associated with a file descriptor, they may be passed
+to other processes, inherited across a fork, and multiplexed using
+.Xr kevent 2 ,
+.Xr poll 2 ,
+or
+.Xr select 2 .
+When a
+.Nm
+object is no longer needed, it may be disposed of using
+.Xr close 2 .
+.Pp
+The
+.Fn timerfd_create
+system call creates a
+.Nm
+object using the clock specified in the
+.Fa clockid
+argument.
+Valid values for
+.Fa clockid
+are
+.Dv CLOCK_REALTIME
+and
+.Dv CLOCK_MONOTONIC .
+The following flags define the behavior of the resulting object:
+.Bl -tag -width "TFD_NONBLOCK"
+.It Dv TFD_CLOEXEC
+This is an alias for the
+.Dv O_CLOEXEC
+flag; see
+.Xr open 2
+for more information.
+.It Dv TFD_NONBLOCK
+This is an alias for the
+.Dv O_NONBLOCK
+flag; see
+.Xr open 2
+for more information.
+.El
+.Pp
+Each time a
+.Nm
+timer expires, an internal counter is incremented.
+Reads from an
+.Nm
+object return the value of this counter in the caller's buffer as an
+unsigned 64-bit integer and reset the counter to
+.Dv 0 .
+If the value of the
+.Nm
+object's counter is
+.Dv 0 ,
+then reads will block, unless the
+.Nm
+object is set for non-blocking I/O.
+.Pp
+Writes to a
+.Nm
+object are not supported.
+.Pp
+The
+.Fn timerfd_settime
+system call sets the next expiration time of the
+.Nm
+object to the
+.Va it_value
+.Po
+see
+.Xr itimerspec 3
+.Pc
+specified in the
+.Fa tim
+argument.
+If the value is
+.Dv 0 ,
+the timer is disarmed.
+If the argument
+.Fa otim
+is not
+.Dv NULL
+the old timer settings are returned.
+The following flags may be specified to alter the behavior of the timer:
+.Bl -tag -width "TFD_TIMER_CANCEL_ON_SET"
+.It Dv TFD_TIMER_ABSTIME
+The specified timer value is an absolute time.
+This is equivalent to specifying
+.Dv TIMER_ABSTIME
+to
+.Xr timer_settime 2 .
+Otherwise, the time value is a relative time, equivalent to specifying
+.Dv TIMER_RELTIME
+to
+.Xr timer_settime 2 .
+.It Dv TFD_TIMER_CANCEL_ON_SET
+If the
+.Nm
+object's clock ID is
+.Dv CLOCK_REALTIME ,
+then the timer will be cancelled and its file descriptor will become
+immediately readable if the system realtime clock is set using
+.Xr clock_settime 2
+or
+.Xr settimeofday 2 .
+If the
+.Nm
+object's clock ID is not
+.Dv CLOCK_REALTIME
+this flag is ignored.
+.El
+.Pp
+If the
+.Va it_interval
+of the
+.Fa tim
+argument is non-zero, then the timer reloads upon expiration.
+.Pp
+The
+.Fn timerfd_gettime
+system call returns the current settings of the
+.Nm
+object in the
+.Fa tim
+argument.
+.Sh RETURN VALUES
+The
+.Fn timerfd_create
+system call returns
+.Dv -1
+if an error occurs, otherwise the return value is a descriptor representing the
+.Nm
+object.
+.Pp
+The
+.Fn timerfd_gettime
+and
+.Fn timerfd_settime
+system calls return
+.Dv 0
+upon success or
+.Dv -1
+if an error occurs.
+.Sh ERRORS
+The
+.Fn timerfd
+system call fails if:
+.Bl -tag -width Er
+.It Bq Er EINVAL
+Flags other than
+.Dv TFD_CLOEXEC
+and
+.Dv TFD_NONBLOCK
+are set in the
+.Fa flags
+argument.
+.It Bq Er EINVAL
+The
+.Fa clockid
+argument was something other than
+.Dv CLOCK_REALTIME
+or
+.Dv CLOCK_MONOTONIC .
+.It Bq Er EMFILE
+The per-process descriptor table is full.
+.It Bq Er ENFILE
+The system file table is full.
+.El
+.Pp
+The
+.Fn timerfd_gettime
+system call fails if:
+.Bl -tag -width Er
+.It Bq Er EBADF
+The argument
+.Fa fd
+is not a valid file descriptor.
+.It Bq Er EINVAL
+The argument
+.Fa fd
+does not refer to a
+.Nm timerfd
+object.
+.It Bq Er EFAULT
+The
+.Fa tim
+argument points outside the allocated address space.
+.El
+.Pp
+The
+.Fn timerfd_settime
+system call fails if:
+.Bl -tag -width Er
+.It Bq Er EBADF
+The argument
+.Fa fd
+is not a valid file descriptor.
+.It Bq Er EINVAL
+The argument
+.Fa fd
+does not refer to a
+.Nm timerfd
+object.
+.It Bq Er EINVAL
+Bits other than the defined
+.Dv TFD_TIMER_ABSTIME
+and
+.Dv TFD_TIMER_CANCEL_ON_SET
+bits are set in the
+.Fa flags
+argument.
+.It Bq Er EINVAL
+A nanosecond field in the
+.Fa tim
+argument specified a value less than zero or greater than or equal to
+.Dv 10e9 .
+.It Bq Er EFAULT
+The
+.Fa tim
+or
+.Fa otim
+arguments point outside the allocated address space.
+.El
+.Pp
+A read from a
+.Nm
+object fails if:
+.Bl -tag -width Er
+.It Bq Er EINVAL
+The size of the read buffer is less than 8 bytes
+.Pq the size required to hold an unsigned 64-bit integer .
+.It Bq Er EAGAIN
+The value of the
+.Nm
+object's expiration counter is
+.Dv 0
+and the
+.Nm
+object is set for non-blocking I/O.
+.It Bq Er ECANCELED
+The
+.Nm
+object was created with the clock ID
+.Dv CLOCK_REALTIME ,
+was configured with the
+.Dv TFD_TIMER_CANCEL_ON_SET
+flag, and the system realtime clock was changed with
+.Xr clock_settime 2
+or
+.Xr settimeofday 2 .
+.El
+.Sh SEE ALSO
+.Xr clock_settime 2 ,
+.Xr close 2 ,
+.Xr kevent 2 ,
+.Xr open 2 ,
+.Xr poll 2 ,
+.Xr read 2 ,
+.Xr select 2 ,
+.Xr settimeofday 2 ,
+.Xr timer_create 2 ,
+.Xr timer_gettime 2 ,
+.Xr timer_settime 2
+.Sh HISTORY
+The
+.Nm
+interface first appeared in
+.Nx 10 .
+It is compatible with the
+.Nm
+interface that appeared in Linux 2.6.25.

Index: src/sys/kern/sys_eventfd.c
diff -u /dev/null src/sys/kern/sys_eventfd.c:1.2
--- /dev/null	Sun Sep 19 15:51:29 2021
+++ src/sys/kern/sys_eventfd.c	Sun Sep 19 15:51:27 2021
@@ -0,0 +1,579 @@
+/*	$NetBSD: sys_eventfd.c,v 1.2 2021/09/19 15:51:27 thorpej Exp $	*/
+
+/*-
+ * Copyright (c) 2020 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason R. Thorpe.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: sys_eventfd.c,v 1.2 2021/09/19 15:51:27 thorpej Exp $");
+
+/*
+ * eventfd
+ *
+ * Eventfd objects present a simple counting object associated with a
+ * file descriptor.  Writes and reads to this file descriptor increment
+ * and decrement the count, respectively.  When the count is non-zero,
+ * the descriptor is considered "readable", and when less than the max
+ * value (EVENTFD_MAXVAL), is considered "writable".
+ *
+ * This implementation is API compatible with the Linux eventfd(2)
+ * interface.
+ */
+
+#include <sys/types.h>
+#include <sys/condvar.h>
+#include <sys/eventfd.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/kauth.h>
+#include <sys/mutex.h>
+#include <sys/poll.h>
+#include <sys/proc.h>
+#include <sys/select.h>
+#include <sys/stat.h>
+#include <sys/syscallargs.h>
+#include <sys/uio.h>
+
+struct eventfd {
+	kmutex_t	efd_lock;
+	kcondvar_t	efd_read_wait;
+	kcondvar_t	efd_write_wait;
+	kcondvar_t	efd_restart_wait;
+	struct selinfo	efd_read_sel;
+	struct selinfo	efd_write_sel;
+	eventfd_t	efd_val;
+	int64_t		efd_nwaiters;
+	bool		efd_restarting;
+	bool		efd_has_read_waiters;
+	bool		efd_has_write_waiters;
+	bool		efd_is_semaphore;
+
+	/*
+	 * Information kept for stat(2).
+	 */
+	struct timespec efd_btime;	/* time created */
+	struct timespec	efd_mtime;	/* last write */
+	struct timespec	efd_atime;	/* last read */
+};
+
+#define	EVENTFD_MAXVAL	(UINT64_MAX - 1)
+
+/*
+ * eventfd_create:
+ *
+ *	Create an eventfd object.
+ */
+static struct eventfd *
+eventfd_create(unsigned int const val, int const flags)
+{
+	struct eventfd * const efd = kmem_zalloc(sizeof(*efd), KM_SLEEP);
+
+	mutex_init(&efd->efd_lock, MUTEX_DEFAULT, IPL_NONE);
+	cv_init(&efd->efd_read_wait, "efdread");
+	cv_init(&efd->efd_write_wait, "efdwrite");
+	cv_init(&efd->efd_restart_wait, "efdrstrt");
+	selinit(&efd->efd_read_sel);
+	selinit(&efd->efd_write_sel);
+	efd->efd_val = val;
+	efd->efd_is_semaphore = !!(flags & EFD_SEMAPHORE);
+	getnanotime(&efd->efd_btime);
+
+	/* Caller deals with EFD_CLOEXEC and EFD_NONBLOCK. */
+
+	return efd;
+}
+
+/*
+ * eventfd_destroy:
+ *
+ *	Destroy an eventfd object.
+ */
+static void
+eventfd_destroy(struct eventfd * const efd)
+{
+
+	KASSERT(efd->efd_nwaiters == 0);
+	KASSERT(efd->efd_restarting == false);
+	KASSERT(efd->efd_has_read_waiters == false);
+	KASSERT(efd->efd_has_write_waiters == false);
+
+	cv_destroy(&efd->efd_read_wait);
+	cv_destroy(&efd->efd_write_wait);
+	cv_destroy(&efd->efd_restart_wait);
+
+	seldestroy(&efd->efd_read_sel);
+	seldestroy(&efd->efd_write_sel);
+
+	mutex_destroy(&efd->efd_lock);
+}
+
+/*
+ * eventfd_wait:
+ *
+ *	Block on an eventfd.  Handles non-blocking, as well as
+ *	the restart cases.
+ */
+static int
+eventfd_wait(struct eventfd * const efd, int const fflag, bool const is_write)
+{
+	kcondvar_t *waitcv;
+	int error;
+
+	if (fflag & FNONBLOCK) {
+		return EAGAIN;
+	}
+
+	/*
+	 * We're going to block.  If there is a restart in-progress,
+	 * wait for that to complete first.
+	 */
+	while (efd->efd_restarting) {
+		cv_wait(&efd->efd_restart_wait, &efd->efd_lock);
+	}
+
+	if (is_write) {
+		efd->efd_has_write_waiters = true;
+		waitcv = &efd->efd_write_wait;
+	} else {
+		efd->efd_has_read_waiters = true;
+		waitcv = &efd->efd_read_wait;
+	}
+
+	efd->efd_nwaiters++;
+	KASSERT(efd->efd_nwaiters > 0);
+	error = cv_wait_sig(waitcv, &efd->efd_lock);
+	efd->efd_nwaiters--;
+	KASSERT(efd->efd_nwaiters >= 0);
+
+	/*
+	 * If a restart was triggered while we were asleep, we need
+	 * to return ERESTART if no other error was returned.  If we
+	 * are the last waiter coming out of the restart drain, clear
+	 * the condition.
+	 */
+	if (efd->efd_restarting) {
+		if (error == 0) {
+			error = ERESTART;
+		}
+		if (efd->efd_nwaiters == 0) {
+			efd->efd_restarting = false;
+			cv_broadcast(&efd->efd_restart_wait);
+		}
+	}
+
+	return error;
+}
+
+/*
+ * eventfd_wake:
+ *
+ *	Wake LWPs block on an eventfd.
+ */
+static void
+eventfd_wake(struct eventfd * const efd, bool const is_write)
+{
+	kcondvar_t *waitcv = NULL;
+	struct selinfo *sel;
+	int pollev;
+
+	if (is_write) {
+		if (efd->efd_has_read_waiters) {
+			waitcv = &efd->efd_read_wait;
+			efd->efd_has_read_waiters = false;
+		}
+		sel = &efd->efd_read_sel;
+		pollev = POLLIN | POLLRDNORM;
+	} else {
+		if (efd->efd_has_write_waiters) {
+			waitcv = &efd->efd_write_wait;
+			efd->efd_has_write_waiters = false;
+		}
+		sel = &efd->efd_write_sel;
+		pollev = POLLOUT | POLLWRNORM;
+	}
+	if (waitcv != NULL) {
+		cv_broadcast(waitcv);
+	}
+	selnotify(sel, pollev, NOTE_SUBMIT);
+}
+
+/*
+ * eventfd file operations
+ */
+
+static int
+eventfd_fop_read(file_t * const fp, off_t * const offset,
+    struct uio * const uio, kauth_cred_t const cred, int const flags)
+{
+	struct eventfd * const efd = fp->f_eventfd;
+	int const fflag = fp->f_flag;
+	eventfd_t return_value;
+	int error;
+
+	if (uio->uio_resid < sizeof(eventfd_t)) {
+		return EINVAL;
+	}
+
+	mutex_enter(&efd->efd_lock);
+
+	while (efd->efd_val == 0) {
+		if ((error = eventfd_wait(efd, fflag, false)) != 0) {
+			mutex_exit(&efd->efd_lock);
+			return error;
+		}
+	}
+
+	if (efd->efd_is_semaphore) {
+		return_value = 1;
+		efd->efd_val--;
+	} else {
+		return_value = efd->efd_val;
+		efd->efd_val = 0;
+	}
+
+	getnanotime(&efd->efd_atime);
+	eventfd_wake(efd, false);
+
+	mutex_exit(&efd->efd_lock);
+
+	error = uiomove(&return_value, sizeof(return_value), uio);
+
+	return error;
+}
+
+static int
+eventfd_fop_write(file_t * const fp, off_t * const offset,
+    struct uio * const uio, kauth_cred_t const cred, int const flags)
+{
+	struct eventfd * const efd = fp->f_eventfd;
+	int const fflag = fp->f_flag;
+	eventfd_t write_value;
+	int error;
+
+	if (uio->uio_resid < sizeof(eventfd_t)) {
+		return EINVAL;
+	}
+
+	if ((error = uiomove(&write_value, sizeof(write_value), uio)) != 0) {
+		return error;
+	}
+
+	if (write_value > EVENTFD_MAXVAL) {
+		error = EINVAL;
+		goto out;
+	}
+
+	mutex_enter(&efd->efd_lock);
+
+	KASSERT(efd->efd_val <= EVENTFD_MAXVAL);
+	while ((EVENTFD_MAXVAL - efd->efd_val) < write_value) {
+		if ((error = eventfd_wait(efd, fflag, true)) != 0) {
+			mutex_exit(&efd->efd_lock);
+			goto out;
+		}
+	}
+
+	efd->efd_val += write_value;
+	KASSERT(efd->efd_val <= EVENTFD_MAXVAL);
+
+	getnanotime(&efd->efd_mtime);
+	eventfd_wake(efd, true);
+
+	mutex_exit(&efd->efd_lock);
+
+ out:
+	if (error) {
+		/*
+		 * Undo the effect of uiomove() so that the error
+		 * gets reported correctly; see dofilewrite().
+		 */
+		uio->uio_resid += sizeof(write_value);
+	}
+	return error;
+}
+
+static int
+eventfd_fop_poll(file_t * const fp, int const events)
+{
+	struct eventfd * const efd = fp->f_eventfd;
+	int revents = 0;
+
+	/*
+	 * Note that Linux will return POLLERR if the eventfd count
+	 * overflows, but that is not possible in the normal read/write
+	 * API, only with Linux kernel-internal interfaces.  So, this
+	 * implementation never returns POLLERR.
+	 *
+	 * Also note that the Linux eventfd(2) man page does not
+	 * specifically discuss returning POLLRDNORM, but we check
+	 * for that event in addition to POLLIN.
+	 */
+
+	mutex_enter(&efd->efd_lock);
+
+	if (events & (POLLIN | POLLRDNORM)) {
+		if (efd->efd_val != 0) {
+			revents |= events & (POLLIN | POLLRDNORM);
+		} else {
+			selrecord(curlwp, &efd->efd_read_sel);
+		}
+	}
+
+	if (events & (POLLOUT | POLLWRNORM)) {
+		if (efd->efd_val < EVENTFD_MAXVAL) {
+			revents |= events & (POLLOUT | POLLWRNORM);
+		} else {
+			selrecord(curlwp, &efd->efd_write_sel);
+		}
+	}
+
+	mutex_exit(&efd->efd_lock);
+
+	return revents;
+}
+
+static int
+eventfd_fop_stat(file_t * const fp, struct stat * const st)
+{
+	struct eventfd * const efd = fp->f_eventfd;
+
+	memset(st, 0, sizeof(*st));
+
+	mutex_enter(&efd->efd_lock);
+	st->st_size = (off_t)efd->efd_val;
+	st->st_blksize = sizeof(eventfd_t);
+	st->st_mode = S_IFIFO | S_IRUSR | S_IWUSR;
+	st->st_blocks = 1;
+	st->st_birthtimespec = st->st_ctimespec = efd->efd_btime;
+	st->st_atimespec = efd->efd_atime;
+	st->st_mtimespec = efd->efd_mtime;
+	st->st_uid = kauth_cred_geteuid(fp->f_cred);
+	st->st_gid = kauth_cred_getegid(fp->f_cred);
+	mutex_exit(&efd->efd_lock);
+
+	return 0;
+}
+
+static int
+eventfd_fop_close(file_t * const fp)
+{
+	struct eventfd * const efd = fp->f_eventfd;
+
+	fp->f_eventfd = NULL;
+	eventfd_destroy(efd);
+
+	return 0;
+}
+
+static void
+eventfd_filt_read_detach(struct knote * const kn)
+{
+	struct eventfd * const efd = ((file_t *)kn->kn_obj)->f_eventfd;
+
+	mutex_enter(&efd->efd_lock);
+	KASSERT(kn->kn_hook == efd);
+	selremove_knote(&efd->efd_read_sel, kn);
+	mutex_exit(&efd->efd_lock);
+}
+
+static int
+eventfd_filt_read(struct knote * const kn, long const hint)
+{
+	struct eventfd * const efd = ((file_t *)kn->kn_obj)->f_eventfd;
+
+	if (hint & NOTE_SUBMIT) {
+		KASSERT(mutex_owned(&efd->efd_lock));
+	} else {
+		mutex_enter(&efd->efd_lock);
+	}
+
+	kn->kn_data = (int64_t)efd->efd_val;
+
+	if ((hint & NOTE_SUBMIT) == 0) {
+		mutex_exit(&efd->efd_lock);
+	}
+
+	return (eventfd_t)kn->kn_data > 0;
+}
+
+static const struct filterops eventfd_read_filterops = {
+	.f_isfd = 1,
+	.f_detach = eventfd_filt_read_detach,
+	.f_event = eventfd_filt_read,
+};
+
+static void
+eventfd_filt_write_detach(struct knote * const kn)
+{
+	struct eventfd * const efd = ((file_t *)kn->kn_obj)->f_eventfd;
+
+	mutex_enter(&efd->efd_lock);
+	KASSERT(kn->kn_hook == efd);
+	selremove_knote(&efd->efd_write_sel, kn);
+	mutex_exit(&efd->efd_lock);
+}
+
+static int
+eventfd_filt_write(struct knote * const kn, long const hint)
+{
+	struct eventfd * const efd = ((file_t *)kn->kn_obj)->f_eventfd;
+
+	if (hint & NOTE_SUBMIT) {
+		KASSERT(mutex_owned(&efd->efd_lock));
+	} else {
+		mutex_enter(&efd->efd_lock);
+	}
+
+	kn->kn_data = (int64_t)efd->efd_val;
+
+	if ((hint & NOTE_SUBMIT) == 0) {
+		mutex_exit(&efd->efd_lock);
+	}
+
+	return (eventfd_t)kn->kn_data < EVENTFD_MAXVAL;
+}
+
+static const struct filterops eventfd_write_filterops = {
+	.f_isfd = 1,
+	.f_detach = eventfd_filt_write_detach,
+	.f_event = eventfd_filt_write,
+};
+
+static int
+eventfd_fop_kqfilter(file_t * const fp, struct knote * const kn)
+{
+	struct eventfd * const efd = ((file_t *)kn->kn_obj)->f_eventfd;
+	struct selinfo *sel;
+
+	switch (kn->kn_filter) {
+	case EVFILT_READ:
+		sel = &efd->efd_read_sel;
+		kn->kn_fop = &eventfd_read_filterops;
+		break;
+
+	case EVFILT_WRITE:
+		sel = &efd->efd_write_sel;
+		kn->kn_fop = &eventfd_write_filterops;
+		break;
+
+	default:
+		return EINVAL;
+	}
+
+	kn->kn_hook = efd;
+
+	mutex_enter(&efd->efd_lock);
+	selrecord_knote(sel, kn);
+	mutex_exit(&efd->efd_lock);
+
+	return 0;
+}
+
+static void
+eventfd_fop_restart(file_t * const fp)
+{
+	struct eventfd * const efd = fp->f_eventfd;
+
+	/*
+	 * Unblock blocked reads/writes in order to allow close() to complete.
+	 * System calls return ERESTART so that the fd is revalidated.
+	 */
+
+	mutex_enter(&efd->efd_lock);
+
+	if (efd->efd_nwaiters != 0) {
+		efd->efd_restarting = true;
+		if (efd->efd_has_read_waiters) {
+			cv_broadcast(&efd->efd_read_wait);
+			efd->efd_has_read_waiters = false;
+		}
+		if (efd->efd_has_write_waiters) {
+			cv_broadcast(&efd->efd_write_wait);
+			efd->efd_has_write_waiters = false;
+		}
+	}
+
+	mutex_exit(&efd->efd_lock);
+}
+
+static const struct fileops eventfd_fileops = {
+	.fo_name = "eventfd",
+	.fo_read = eventfd_fop_read,
+	.fo_write = eventfd_fop_write,
+	.fo_ioctl = fbadop_ioctl,
+	.fo_fcntl = fnullop_fcntl,
+	.fo_poll = eventfd_fop_poll,
+	.fo_stat = eventfd_fop_stat,
+	.fo_close = eventfd_fop_close,
+	.fo_kqfilter = eventfd_fop_kqfilter,
+	.fo_restart = eventfd_fop_restart,
+};
+
+/*
+ * eventfd(2) system call
+ */
+int
+do_eventfd(struct lwp * const l, unsigned int const val, int const flags,
+    register_t *retval)
+{
+	file_t *fp;
+	int fd, error;
+
+	if (flags & ~(EFD_CLOEXEC | EFD_NONBLOCK | EFD_SEMAPHORE)) {
+		return EINVAL;
+	}
+
+	if ((error = fd_allocfile(&fp, &fd)) != 0) {
+		return error;
+	}
+
+	fp->f_flag = FREAD | FWRITE;
+	if (flags & EFD_NONBLOCK) {
+		fp->f_flag |= FNONBLOCK;
+	}
+	fp->f_type = DTYPE_EVENTFD;
+	fp->f_ops = &eventfd_fileops;
+	fp->f_eventfd = eventfd_create(val, flags);
+	fd_set_exclose(l, fd, !!(flags & EFD_CLOEXEC));
+	fd_affix(curproc, fp, fd);
+
+	*retval = fd;
+	return 0;
+}
+
+int
+sys_eventfd(struct lwp *l, const struct sys_eventfd_args *uap,
+    register_t *retval)
+{
+	/* {
+		syscallarg(unsigned int) val;
+		syscallarg(int) flags;
+	} */
+
+	return do_eventfd(l, SCARG(uap, val), SCARG(uap, flags), retval);
+}
Index: src/sys/kern/sys_timerfd.c
diff -u /dev/null src/sys/kern/sys_timerfd.c:1.2
--- /dev/null	Sun Sep 19 15:51:29 2021
+++ src/sys/kern/sys_timerfd.c	Sun Sep 19 15:51:27 2021
@@ -0,0 +1,691 @@
+/*	$NetBSD: sys_timerfd.c,v 1.2 2021/09/19 15:51:27 thorpej Exp $	*/
+
+/*-
+ * Copyright (c) 2020 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason R. Thorpe.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: sys_timerfd.c,v 1.2 2021/09/19 15:51:27 thorpej Exp $");
+
+/*
+ * timerfd
+ *
+ * Timerfd objects are similar to POSIX timers, except they are associated
+ * with a file descriptor rather than a process.  Timerfd objects are
+ * created with the timerfd_create(2) system call, similar to timer_create(2).
+ * The timerfd analogues for timer_gettime(2) and timer_settime(2) are
+ * timerfd_gettime(2) and timerfd_settime(2), respectively.
+ *
+ * When a timerfd object's timer fires, an internal counter is incremented.
+ * When this counter is non-zero, the descriptor associated with the timerfd
+ * object is "readable".  Note that this is slightly different than the
+ * POSIX timer "overrun" counter, which only increments if the timer fires
+ * again while the notification signal is already pending.  Thus, we are
+ * responsible for incrementing the "overrun" counter each time the timerfd
+ * timer fires.
+ *
+ * This implementation is API compatible with the Linux timerfd interface.
+ */
+
+#include <sys/types.h>
+#include <sys/condvar.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/kauth.h>
+#include <sys/mutex.h>
+#include <sys/poll.h>
+#include <sys/proc.h>
+#include <sys/select.h>
+#include <sys/stat.h>
+#include <sys/syscallargs.h>
+#include <sys/timerfd.h>
+#include <sys/uio.h>
+
+/* N.B. all timerfd state is protected by itimer_lock() */
+struct timerfd {
+	struct itimer	tfd_itimer;
+	kcondvar_t	tfd_read_wait;
+	kcondvar_t	tfd_restart_wait;
+	struct selinfo	tfd_read_sel;
+	int64_t		tfd_nwaiters;
+	bool		tfd_cancel_on_set;
+	bool		tfd_cancelled;
+	bool		tfd_restarting;
+
+	/*
+	 * Information kept for stat(2).
+	 */
+	struct timespec tfd_btime;	/* time created */
+	struct timespec	tfd_mtime;	/* last timerfd_settime() */
+	struct timespec	tfd_atime;	/* last read */
+};
+
+static void	timerfd_wake(struct timerfd *);
+
+static inline uint64_t
+timerfd_fire_count(const struct timerfd * const tfd)
+{
+	return (unsigned int)tfd->tfd_itimer.it_overruns;
+}
+
+static inline bool
+timerfd_is_readable(const struct timerfd * const tfd)
+{
+	return tfd->tfd_itimer.it_overruns != 0 || tfd->tfd_cancelled;
+}
+
+/*
+ * timerfd_fire:
+ *
+ *	Called when the timerfd's timer fires.
+ *
+ *	Called from a callout with itimer lock held.
+ */
+static void
+timerfd_fire(struct itimer * const it)
+{
+	struct timerfd * const tfd =
+	    container_of(it, struct timerfd, tfd_itimer);
+
+	it->it_overruns++;
+	timerfd_wake(tfd);
+}
+
+/*
+ * timerfd_realtime_changed:
+ *
+ *	Called when CLOCK_REALTIME is changed with clock_settime()
+ *	or settimeofday().
+ *
+ *	Called with itimer lock held.
+ */
+static void
+timerfd_realtime_changed(struct itimer * const it)
+{
+	struct timerfd * const tfd =
+	    container_of(it, struct timerfd, tfd_itimer);
+
+	/* Should only be called when timer is armed. */
+	KASSERT(timespecisset(&it->it_time.it_value));
+
+	if (tfd->tfd_cancel_on_set) {
+		tfd->tfd_cancelled = true;
+		timerfd_wake(tfd);
+	}
+}
+
+static const struct itimer_ops timerfd_itimer_monotonic_ops = {
+	.ito_fire = timerfd_fire,
+};
+
+static const struct itimer_ops timerfd_itimer_realtime_ops = {
+	.ito_fire = timerfd_fire,
+	.ito_realtime_changed = timerfd_realtime_changed,
+};
+
+/*
+ * timerfd_create:
+ *
+ *	Create a timerfd object.
+ */
+static struct timerfd *
+timerfd_create(clockid_t const clock_id, int const flags)
+{
+	struct timerfd * const tfd = kmem_zalloc(sizeof(*tfd), KM_SLEEP);
+
+	KASSERT(clock_id == CLOCK_REALTIME || clock_id == CLOCK_MONOTONIC);
+
+	cv_init(&tfd->tfd_read_wait, "tfdread");
+	cv_init(&tfd->tfd_restart_wait, "tfdrstrt");
+	selinit(&tfd->tfd_read_sel);
+	getnanotime(&tfd->tfd_btime);
+
+	/* Caller deals with TFD_CLOEXEC and TFD_NONBLOCK. */
+
+	itimer_lock();
+	itimer_init(&tfd->tfd_itimer,
+	    clock_id == CLOCK_REALTIME ? &timerfd_itimer_realtime_ops
+				       : &timerfd_itimer_monotonic_ops,
+	    clock_id, NULL);
+	itimer_unlock();
+
+	return tfd;
+}
+
+/*
+ * timerfd_destroy:
+ *
+ *	Destroy a timerfd object.
+ */
+static void
+timerfd_destroy(struct timerfd * const tfd)
+{
+
+	KASSERT(tfd->tfd_nwaiters == 0);
+	KASSERT(tfd->tfd_restarting == false);
+
+	itimer_lock();
+	itimer_poison(&tfd->tfd_itimer);
+	itimer_fini(&tfd->tfd_itimer);	/* drops itimer lock */
+
+	cv_destroy(&tfd->tfd_read_wait);
+	cv_destroy(&tfd->tfd_restart_wait);
+
+	seldestroy(&tfd->tfd_read_sel);
+
+	kmem_free(tfd, sizeof(*tfd));
+}
+
+/*
+ * timerfd_wait:
+ *
+ *	Block on a timerfd.  Handles non-blocking, as well as
+ *	the restart cases.
+ */
+static int
+timerfd_wait(struct timerfd * const tfd, int const fflag)
+{
+	extern kmutex_t	itimer_mutex;	/* XXX */
+	int error;
+
+	if (fflag & FNONBLOCK) {
+		return EAGAIN;
+	}
+
+	/*
+	 * We're going to block.  If there is a restart in-progress,
+	 * wait for that to complete first.
+	 */
+	while (tfd->tfd_restarting) {
+		cv_wait(&tfd->tfd_restart_wait, &itimer_mutex);
+	}
+
+	tfd->tfd_nwaiters++;
+	KASSERT(tfd->tfd_nwaiters > 0);
+	error = cv_wait_sig(&tfd->tfd_read_wait, &itimer_mutex);
+	tfd->tfd_nwaiters--;
+	KASSERT(tfd->tfd_nwaiters >= 0);
+
+	/*
+	 * If a restart was triggered while we were asleep, we need
+	 * to return ERESTART if no other error was returned.  If we
+	 * are the last waiter coming out of the restart drain, clear
+	 * the condition.
+	 */
+	if (tfd->tfd_restarting) {
+		if (error == 0) {
+			error = ERESTART;
+		}
+		if (tfd->tfd_nwaiters == 0) {
+			tfd->tfd_restarting = false;
+			cv_broadcast(&tfd->tfd_restart_wait);
+		}
+	}
+
+	return error;
+}
+
+/*
+ * timerfd_wake:
+ *
+ *	Wake LWPs blocked on a timerfd.
+ */
+static void
+timerfd_wake(struct timerfd * const tfd)
+{
+
+	if (tfd->tfd_nwaiters) {
+		cv_broadcast(&tfd->tfd_read_wait);
+	}
+	selnotify(&tfd->tfd_read_sel, POLLIN | POLLRDNORM, NOTE_SUBMIT);
+}
+
+/*
+ * timerfd file operations
+ */
+
+static int
+timerfd_fop_read(file_t * const fp, off_t * const offset,
+    struct uio * const uio, kauth_cred_t const cred, int const flags)
+{
+	struct timerfd * const tfd = fp->f_timerfd;
+	struct itimer * const it = &tfd->tfd_itimer;
+	int const fflag = fp->f_flag;
+	uint64_t return_value;
+	int error;
+
+	if (uio->uio_resid < sizeof(uint64_t)) {
+		return EINVAL;
+	}
+
+	itimer_lock();
+
+	while (!timerfd_is_readable(tfd)) {
+		if ((error = timerfd_wait(tfd, fflag)) != 0) {
+			itimer_unlock();
+			return error;
+		}
+	}
+
+	if (tfd->tfd_cancelled) {
+		itimer_unlock();
+		return ECANCELED;
+	}
+
+	return_value = timerfd_fire_count(tfd);
+	it->it_overruns = 0;
+
+	getnanotime(&tfd->tfd_atime);
+
+	itimer_unlock();
+
+	error = uiomove(&return_value, sizeof(return_value), uio);
+
+	return error;
+}
+
+static int
+timerfd_fop_ioctl(file_t * const fp, unsigned long const cmd, void * const data)
+{
+	struct timerfd * const tfd = fp->f_timerfd;
+	int error = 0;
+
+	switch (cmd) {
+	case TFD_IOC_SET_TICKS: {
+		const uint64_t * const new_ticksp = data;
+		if (*new_ticksp > INT_MAX) {
+			return EINVAL;
+		}
+		itimer_lock();
+		tfd->tfd_itimer.it_overruns = (int)*new_ticksp;
+		itimer_unlock();
+		break;
+	    }
+
+	default:
+		error = EPASSTHROUGH;
+	}
+
+	return error;
+}
+
+static int
+timerfd_fop_poll(file_t * const fp, int const events)
+{
+	struct timerfd * const tfd = fp->f_timerfd;
+	int revents = events & (POLLOUT | POLLWRNORM);
+
+	if (events & (POLLIN | POLLRDNORM)) {
+		itimer_lock();
+		if (timerfd_is_readable(tfd)) {
+			revents |= events & (POLLIN | POLLRDNORM);
+		} else {
+			selrecord(curlwp, &tfd->tfd_read_sel);
+		}
+		itimer_unlock();
+	}
+
+	return revents;
+}
+
+static int
+timerfd_fop_stat(file_t * const fp, struct stat * const st)
+{
+	struct timerfd * const tfd = fp->f_timerfd;
+
+	memset(st, 0, sizeof(*st));
+
+	itimer_lock();
+	st->st_size = (off_t)timerfd_fire_count(tfd);
+	st->st_atimespec = tfd->tfd_atime;
+	st->st_mtimespec = tfd->tfd_mtime;
+	itimer_unlock();
+
+	st->st_blksize = sizeof(uint64_t);
+	st->st_mode = S_IFIFO | S_IRUSR | S_IWUSR;
+	st->st_blocks = 1;
+	st->st_birthtimespec = tfd->tfd_btime;
+	st->st_ctimespec = st->st_mtimespec;
+	st->st_uid = kauth_cred_geteuid(fp->f_cred);
+	st->st_gid = kauth_cred_getegid(fp->f_cred);
+
+	return 0;
+}
+
+static int
+timerfd_fop_close(file_t * const fp)
+{
+	struct timerfd * const tfd = fp->f_timerfd;
+
+	fp->f_timerfd = NULL;
+	timerfd_destroy(tfd);
+
+	return 0;
+}
+
+static void
+timerfd_filt_read_detach(struct knote * const kn)
+{
+	struct timerfd * const tfd = ((file_t *)kn->kn_obj)->f_timerfd;
+
+	itimer_lock();
+	KASSERT(kn->kn_hook == tfd);
+	selremove_knote(&tfd->tfd_read_sel, kn);
+	itimer_unlock();
+}
+
+static int
+timerfd_filt_read(struct knote * const kn, long const hint)
+{
+	struct timerfd * const tfd = ((file_t *)kn->kn_obj)->f_timerfd;
+
+	if (hint & NOTE_SUBMIT) {
+		KASSERT(itimer_lock_held());
+	} else {
+		itimer_lock();
+	}
+
+	kn->kn_data = (int64_t)timerfd_fire_count(tfd);
+
+	if ((hint & NOTE_SUBMIT) == 0) {
+		itimer_unlock();
+	}
+
+	return kn->kn_data != 0;
+}
+
+static const struct filterops timerfd_read_filterops = {
+	.f_isfd = 1,
+	.f_detach = timerfd_filt_read_detach,
+	.f_event = timerfd_filt_read,
+};
+
+static int
+timerfd_fop_kqfilter(file_t * const fp, struct knote * const kn)
+{
+	struct timerfd * const tfd = ((file_t *)kn->kn_obj)->f_timerfd;
+	struct selinfo *sel;
+
+	switch (kn->kn_filter) {
+	case EVFILT_READ:
+		sel = &tfd->tfd_read_sel;
+		kn->kn_fop = &timerfd_read_filterops;
+		break;
+
+	default:
+		return EINVAL;
+	}
+
+	kn->kn_hook = tfd;
+
+	itimer_lock();
+	selrecord_knote(sel, kn);
+	itimer_unlock();
+
+	return 0;
+}
+
+static void
+timerfd_fop_restart(file_t * const fp)
+{
+	struct timerfd * const tfd = fp->f_timerfd;
+
+	/*
+	 * Unblock blocked reads in order to allow close() to complete.
+	 * System calls return ERESTART so that the fd is revalidated.
+	 */
+
+	itimer_lock();
+
+	if (tfd->tfd_nwaiters != 0) {
+		tfd->tfd_restarting = true;
+		cv_broadcast(&tfd->tfd_read_wait);
+	}
+
+	itimer_unlock();
+}
+
+static const struct fileops timerfd_fileops = {
+	.fo_name = "timerfd",
+	.fo_read = timerfd_fop_read,
+	.fo_write = fbadop_write,
+	.fo_ioctl = timerfd_fop_ioctl,
+	.fo_fcntl = fnullop_fcntl,
+	.fo_poll = timerfd_fop_poll,
+	.fo_stat = timerfd_fop_stat,
+	.fo_close = timerfd_fop_close,
+	.fo_kqfilter = timerfd_fop_kqfilter,
+	.fo_restart = timerfd_fop_restart,
+};
+
+/*
+ * timerfd_create(2) system call
+ */
+int
+do_timerfd_create(struct lwp * const l, clockid_t const clock_id,
+    int const flags, register_t *retval)
+{
+	file_t *fp;
+	int fd, error;
+
+	if (flags & ~(TFD_CLOEXEC | TFD_NONBLOCK)) {
+		return EINVAL;
+	}
+
+	switch (clock_id) {
+	case CLOCK_REALTIME:
+	case CLOCK_MONOTONIC:
+		/* allowed */
+		break;
+
+	default:
+		return EINVAL;
+	}
+
+	if ((error = fd_allocfile(&fp, &fd)) != 0) {
+		return error;
+	}
+
+	fp->f_flag = FREAD;
+	if (flags & TFD_NONBLOCK) {
+		fp->f_flag |= FNONBLOCK;
+	}
+	fp->f_type = DTYPE_TIMERFD;
+	fp->f_ops = &timerfd_fileops;
+	fp->f_timerfd = timerfd_create(clock_id, flags);
+	fd_set_exclose(l, fd, !!(flags & TFD_CLOEXEC));
+	fd_affix(curproc, fp, fd);
+
+	*retval = fd;
+	return 0;
+}
+
+int
+sys_timerfd_create(struct lwp *l, const struct sys_timerfd_create_args *uap,
+    register_t *retval)
+{
+	/* {
+		syscallarg(clockid_t) clock_id;
+		syscallarg(int) flags;
+	} */
+
+	return do_timerfd_create(l, SCARG(uap, clock_id), SCARG(uap, flags),
+	    retval);
+}
+
+/*
+ * timerfd_gettime(2) system call.
+ */
+int
+do_timerfd_gettime(struct lwp *l, int fd, struct itimerspec *curr_value,
+    register_t *retval)
+{
+	file_t *fp;
+
+	if ((fp = fd_getfile(fd)) == NULL) {
+		return EBADF;
+	}
+
+	if (fp->f_ops != &timerfd_fileops) {
+		fd_putfile(fd);
+		return EINVAL;
+	}
+
+	struct timerfd * const tfd = fp->f_timerfd;
+	itimer_lock();
+	itimer_gettime(&tfd->tfd_itimer, curr_value);
+	itimer_unlock();
+
+	fd_putfile(fd);
+	return 0;
+}
+
+int
+sys_timerfd_gettime(struct lwp *l, const struct sys_timerfd_gettime_args *uap,
+    register_t *retval)
+{
+	/* {
+		syscallarg(int) fd;
+		syscallarg(struct itimerspec *) curr_value;
+	} */
+
+	struct itimerspec oits;
+	int error;
+
+	error = do_timerfd_gettime(l, SCARG(uap, fd), &oits, retval);
+	if (error == 0) {
+		error = copyout(&oits, SCARG(uap, curr_value), sizeof(oits));
+	}
+	return error;
+}
+
+/*
+ * timerfd_settime(2) system call.
+ */
+int
+do_timerfd_settime(struct lwp *l, int fd, int flags,
+    const struct itimerspec *new_value, struct itimerspec *old_value,
+    register_t *retval)
+{
+	file_t *fp;
+	int error;
+
+	if (flags & ~(TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET)) {
+		return EINVAL;
+	}
+
+	if ((fp = fd_getfile(fd)) == NULL) {
+		return EBADF;
+	}
+
+	if (fp->f_ops != &timerfd_fileops) {
+		fd_putfile(fd);
+		return EINVAL;
+	}
+
+	struct timerfd * const tfd = fp->f_timerfd;
+	struct itimer * const it = &tfd->tfd_itimer;
+
+	itimer_lock();
+
+ restart:
+	if (old_value != NULL) {
+		*old_value = it->it_time;
+	}
+	it->it_time = *new_value;
+
+	/*
+	 * If we've been passed a relative value, convert it to an
+	 * absolute, as that's what the itimer facility expects for
+	 * non-virtual timers.  Also ensure that this doesn't set it
+	 * to zero or lets it go negative.
+	 * XXXJRT re-factor.
+	 */
+	if (timespecisset(&it->it_time.it_value) &&
+	    (flags & TFD_TIMER_ABSTIME) == 0) {
+		struct timespec now;
+		if (it->it_clockid == CLOCK_REALTIME) {
+			getnanotime(&now);
+		} else { /* CLOCK_MONOTONIC */
+			getnanouptime(&now);
+		}
+		timespecadd(&it->it_time.it_value, &now,
+		    &it->it_time.it_value);
+	}
+
+	error = itimer_settime(it);
+	if (error == ERESTART) {
+		goto restart;
+	}
+	KASSERT(error == 0);
+
+	/* Reset the expirations counter. */
+	it->it_overruns = 0;
+
+	if (it->it_clockid == CLOCK_REALTIME) {
+		tfd->tfd_cancelled = false;
+		tfd->tfd_cancel_on_set = !!(flags & TFD_TIMER_CANCEL_ON_SET);
+	}
+
+	getnanotime(&tfd->tfd_mtime);
+	itimer_unlock();
+
+	fd_putfile(fd);
+	return error;
+}
+
+int
+sys_timerfd_settime(struct lwp *l, const struct sys_timerfd_settime_args *uap,
+    register_t *retval)
+{
+	/* {
+		syscallarg(int) fd;
+		syscallarg(int) flags;
+		syscallarg(const struct itimerspec *) new_value;
+		syscallarg(struct itimerspec *) old_value;
+	} */
+
+	struct itimerspec nits, oits, *oitsp = NULL;
+	int error;
+
+	error = copyin(SCARG(uap, new_value), &nits, sizeof(nits));
+	if (error) {
+		return error;
+	}
+
+	if (SCARG(uap, old_value) != NULL) {
+		oitsp = &oits;
+	}
+
+	error = do_timerfd_settime(l, SCARG(uap, fd), SCARG(uap, flags),
+	    &nits, oitsp, retval);
+	if (error == 0 && oitsp != NULL) {
+		error = copyout(oitsp, SCARG(uap, old_value), sizeof(*oitsp));
+	}
+	return error;
+}

Index: src/sys/sys/eventfd.h
diff -u /dev/null src/sys/sys/eventfd.h:1.2
--- /dev/null	Sun Sep 19 15:51:29 2021
+++ src/sys/sys/eventfd.h	Sun Sep 19 15:51:27 2021
@@ -0,0 +1,57 @@
+/*	$NetBSD: eventfd.h,v 1.2 2021/09/19 15:51:27 thorpej Exp $	*/
+
+/*-
+ * Copyright (c) 2020 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason R. Thorpe.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _SYS_EVENTFD_H_
+#define	_SYS_EVENTFD_H_
+
+#include <sys/fcntl.h>
+
+/*
+ * Definitions for eventfd(2).  This implementation is API compatible
+ * with the Linux eventfd(2) interface.
+ */
+
+typedef uint64_t eventfd_t;
+
+#define	EFD_SEMAPHORE	O_RDWR
+#define	EFD_CLOEXEC	O_CLOEXEC
+#define	EFD_NONBLOCK	O_NONBLOCK
+
+#ifdef _KERNEL
+struct lwp;
+int	do_eventfd(struct lwp *, unsigned int, int, register_t *);
+#else /* ! _KERNEL */
+int	eventfd(unsigned int, int);
+int	eventfd_read(int, eventfd_t *);
+int	eventfd_write(int, eventfd_t);
+#endif /* _KERNEL */
+
+#endif /* _SYS_EVENTFD_H_ */
Index: src/sys/sys/timerfd.h
diff -u /dev/null src/sys/sys/timerfd.h:1.2
--- /dev/null	Sun Sep 19 15:51:29 2021
+++ src/sys/sys/timerfd.h	Sun Sep 19 15:51:27 2021
@@ -0,0 +1,65 @@
+/*	$NetBSD: timerfd.h,v 1.2 2021/09/19 15:51:27 thorpej Exp $	*/
+
+/*-
+ * Copyright (c) 2020 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason R. Thorpe.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _SYS_TIMERFD_H_
+#define	_SYS_TIMERFD_H_
+
+#include <sys/fcntl.h>
+#include <sys/ioccom.h>
+#include <sys/time.h>
+
+/*
+ * Definitions for timerfd_create(2) / timerfd_gettime(2) / timerfd_settime(2).
+ * This implementation is API compatible with the Linux interface.
+ */
+
+#define	TFD_TIMER_ABSTIME	O_WRONLY
+#define	TFD_TIMER_CANCEL_ON_SET	O_RDWR
+#define	TFD_CLOEXEC		O_CLOEXEC
+#define	TFD_NONBLOCK		O_NONBLOCK
+
+#define	TFD_IOC_SET_TICKS	_IOW('T', 0, uint64_t)
+
+#ifdef _KERNEL
+struct lwp;
+int	do_timerfd_create(struct lwp *, clockid_t, int, register_t *);
+int	do_timerfd_gettime(struct lwp *, int, struct itimerspec *,
+	    register_t *);
+int	do_timerfd_settime(struct lwp *, int, int, const struct itimerspec *,
+	    struct itimerspec *, register_t *);
+#else /* ! _KERNEL */
+int	timerfd_create(clockid_t, int);
+int	timerfd_gettime(int, struct itimerspec *);
+int	timerfd_settime(int, int, const struct itimerspec *,
+	    struct itimerspec *);
+#endif /* _KERNEL */
+
+#endif /* _SYS_TIMERFD_H_ */

Index: src/tests/lib/libc/sys/t_eventfd.c
diff -u /dev/null src/tests/lib/libc/sys/t_eventfd.c:1.2
--- /dev/null	Sun Sep 19 15:51:29 2021
+++ src/tests/lib/libc/sys/t_eventfd.c	Sun Sep 19 15:51:28 2021
@@ -0,0 +1,790 @@
+/* $NetBSD: t_eventfd.c,v 1.2 2021/09/19 15:51:28 thorpej Exp $ */
+
+/*-
+ * Copyright (c) 2020 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__COPYRIGHT("@(#) Copyright (c) 2020\
+ The NetBSD Foundation, inc. All rights reserved.");
+__RCSID("$NetBSD: t_eventfd.c,v 1.2 2021/09/19 15:51:28 thorpej Exp $");
+
+#include <sys/types.h>
+#include <sys/event.h>
+#include <sys/eventfd.h>
+#include <sys/select.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <errno.h>
+#include <poll.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <atf-c.h>
+
+struct helper_context {
+	int	efd;
+
+	pthread_mutex_t mutex;
+	pthread_cond_t cond;
+	pthread_barrier_t barrier;
+	int	state;
+};
+
+static void
+init_helper_context(struct helper_context * const ctx)
+{
+	pthread_condattr_t condattr;
+
+	memset(ctx, 0, sizeof(*ctx));
+
+	ATF_REQUIRE(pthread_mutex_init(&ctx->mutex, NULL) == 0);
+
+	ATF_REQUIRE(pthread_condattr_init(&condattr) == 0);
+	ATF_REQUIRE(pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) == 0);
+	ATF_REQUIRE(pthread_cond_init(&ctx->cond, &condattr) == 0);
+	ATF_REQUIRE(pthread_condattr_destroy(&condattr) == 0);
+
+	ATF_REQUIRE(pthread_barrier_init(&ctx->barrier, NULL, 2) == 0);
+}
+
+static void
+set_state(struct helper_context * const ctx, int const new)
+{
+	pthread_mutex_lock(&ctx->mutex);
+	ctx->state = new;
+	pthread_cond_signal(&ctx->cond);
+	pthread_mutex_unlock(&ctx->mutex);
+}
+
+static int
+get_state(struct helper_context * const ctx)
+{
+	int rv;
+
+	pthread_mutex_lock(&ctx->mutex);
+	rv = ctx->state;
+	pthread_mutex_unlock(&ctx->mutex);
+
+	return rv;
+}
+
+static bool
+wait_state(struct helper_context * const ctx, int const val)
+{
+	struct timespec deadline;
+	int error;
+	bool rv;
+
+	pthread_mutex_lock(&ctx->mutex);
+
+	ATF_REQUIRE(clock_gettime(CLOCK_MONOTONIC, &deadline) == 0);
+	deadline.tv_sec += 5;
+
+	while (ctx->state != val) {
+		error = pthread_cond_timedwait(&ctx->cond, &ctx->mutex,
+		    &deadline);
+		if (error) {
+			break;
+		}
+	}
+	rv = ctx->state == val;
+
+	pthread_mutex_unlock(&ctx->mutex);
+
+	return rv;
+}
+
+static bool
+wait_barrier(struct helper_context * const ctx)
+{
+	int rv = pthread_barrier_wait(&ctx->barrier);
+
+	return rv == 0 || rv == PTHREAD_BARRIER_SERIAL_THREAD;
+}
+
+/*****************************************************************************/
+
+static void *
+eventfd_normal_helper(void * const v)
+{
+	struct helper_context * const ctx = v;
+	eventfd_t efd_value;
+
+	ATF_REQUIRE(wait_barrier(ctx));
+
+	/* Read the value.  This will reset it to zero. */
+	ATF_REQUIRE(get_state(ctx) == 666);
+	ATF_REQUIRE(eventfd_read(ctx->efd, &efd_value) == 0);
+
+	/* Assert the value. */
+	ATF_REQUIRE(efd_value == 0xcafebabe);
+
+	set_state(ctx, 0);
+
+	/* Wait for the main thread to prep the next test. */
+	ATF_REQUIRE(wait_barrier(ctx));
+
+	/* Read the value. */
+	ATF_REQUIRE(eventfd_read(ctx->efd, &efd_value) == 0);
+
+	/* Assert the value. */
+	ATF_REQUIRE(efd_value == 0xbeefcafe);
+
+	ATF_REQUIRE(wait_barrier(ctx));
+
+	return NULL;
+}
+
+ATF_TC(eventfd_normal);
+ATF_TC_HEAD(eventfd_normal, tc)
+{
+	atf_tc_set_md_var(tc, "descr",
+	    "validates basic normal eventfd operation");
+}
+ATF_TC_BODY(eventfd_normal, tc)
+{
+	struct helper_context ctx;
+	pthread_t helper;
+	void *join_val;
+
+	init_helper_context(&ctx);
+
+	ATF_REQUIRE((ctx.efd = eventfd(0, 0)) >= 0);
+
+	ATF_REQUIRE(pthread_create(&helper, NULL,
+				   eventfd_normal_helper, &ctx) == 0);
+
+	/*
+	 * Wait for the helper to block in read().  Give it some time
+	 * so that if the read fails or returns immediately, we'll
+	 * notice.
+	 */
+	set_state(&ctx, 666);
+	ATF_REQUIRE(wait_barrier(&ctx));
+	sleep(2);
+	ATF_REQUIRE(get_state(&ctx) == 666);
+
+	/* Write a distinct value; helper will assert it. */
+	ATF_REQUIRE(eventfd_write(ctx.efd, 0xcafebabe) == 0);
+
+	/* Wait for helper to read the value. */
+	ATF_REQUIRE(wait_state(&ctx, 0));
+
+	/* Helper is now blocked in a barrier. */
+
+	/* Test additive property of the efd value. */
+	ATF_REQUIRE(eventfd_write(ctx.efd, 0x0000cafe) == 0);
+	ATF_REQUIRE(eventfd_write(ctx.efd, 0xbeef0000) == 0);
+
+	/* Satisfy the barrier; helper will read value and assert 0xbeefcafe. */
+	ATF_REQUIRE(wait_barrier(&ctx));
+
+	/* And wait for it to finish. */
+	ATF_REQUIRE(wait_barrier(&ctx));
+
+	/* Reap the helper. */
+	ATF_REQUIRE(pthread_join(helper, &join_val) == 0);
+
+	(void) close(ctx.efd);
+}
+
+/*****************************************************************************/
+
+ATF_TC(eventfd_semaphore);
+ATF_TC_HEAD(eventfd_semaphore, tc)
+{
+	atf_tc_set_md_var(tc, "descr",
+	    "validates semaphore and non-blocking eventfd operation");
+}
+ATF_TC_BODY(eventfd_semaphore, tc)
+{
+	eventfd_t efd_value;
+	int efd;
+
+	ATF_REQUIRE((efd = eventfd(3, EFD_SEMAPHORE | EFD_NONBLOCK)) >= 0);
+
+	/* 3 reads should succeed without blocking. */
+	ATF_REQUIRE(eventfd_read(efd, &efd_value) == 0);
+	ATF_REQUIRE(efd_value == 1);
+
+	ATF_REQUIRE(eventfd_read(efd, &efd_value) == 0);
+	ATF_REQUIRE(efd_value == 1);
+
+	ATF_REQUIRE(eventfd_read(efd, &efd_value) == 0);
+	ATF_REQUIRE(efd_value == 1);
+
+	/* This one should block. */
+	ATF_REQUIRE_ERRNO(EAGAIN,
+	    eventfd_read(efd, &efd_value) == -1);
+
+	/* Add 1 to the semaphore. */
+	ATF_REQUIRE(eventfd_write(efd, 1) == 0);
+
+	/* One more read allowed. */
+	ATF_REQUIRE(eventfd_read(efd, &efd_value) == 0);
+	ATF_REQUIRE(efd_value == 1);
+
+	/* And this one again should block. */
+	ATF_REQUIRE_ERRNO(EAGAIN,
+	    eventfd_read(efd, &efd_value) == -1);
+
+	(void) close(efd);
+}
+
+/*****************************************************************************/
+
+ATF_TC(eventfd_select_poll_kevent_immed);
+ATF_TC_HEAD(eventfd_select_poll_kevent_immed, tc)
+{
+	atf_tc_set_md_var(tc, "descr",
+	    "validates select/poll/kevent behavior - immediate return");
+}
+ATF_TC_BODY(eventfd_select_poll_kevent_immed, tc)
+{
+	const struct timespec ts = { .tv_sec = 0, .tv_nsec = 0 };
+	struct timeval tv;
+	struct pollfd fds[1];
+	fd_set readfds, writefds, exceptfds;
+	int efd;
+	int kq;
+	struct kevent kev[2];
+
+	ATF_REQUIRE((efd = eventfd(0, EFD_NONBLOCK)) >= 0);
+
+	ATF_REQUIRE((kq = kqueue()) >= 0);
+	EV_SET(&kev[0], efd, EVFILT_READ, EV_ADD, 0, 0, NULL);
+	EV_SET(&kev[1], efd, EVFILT_WRITE, EV_ADD, 0, 0, NULL);
+	ATF_REQUIRE(kevent(kq, kev, 2, NULL, 0, &ts) == 0);
+
+	/*
+	 * efd should be writable but not readable.  Pass all of the
+	 * event bits; we should only get back POLLOUT | POLLWRNORM.
+	 */
+	fds[0].fd = efd;
+	fds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI |
+	    POLLOUT | POLLWRNORM | POLLWRBAND | POLLHUP;
+	fds[0].revents = 0;
+	ATF_REQUIRE(poll(fds, 1, 0) == 1);
+	ATF_REQUIRE(fds[0].revents == (POLLOUT | POLLWRNORM));
+
+	/*
+	 * As above; efd should only be set in writefds upon return
+	 * from the select() call.
+	 */
+	FD_ZERO(&readfds);
+	FD_ZERO(&writefds);
+	FD_ZERO(&exceptfds);
+	tv.tv_sec = 0;
+	tv.tv_usec = 0;
+	FD_SET(efd, &readfds);
+	FD_SET(efd, &writefds);
+	FD_SET(efd, &exceptfds);
+	ATF_REQUIRE(select(efd + 1, &readfds, &writefds, &exceptfds, &tv) == 1);
+	ATF_REQUIRE(!FD_ISSET(efd, &readfds));
+	ATF_REQUIRE(FD_ISSET(efd, &writefds));
+	ATF_REQUIRE(!FD_ISSET(efd, &exceptfds));
+
+	/*
+	 * Check that we get an EVFILT_WRITE event (and only that event)
+	 * on efd.
+	 */
+	memset(kev, 0, sizeof(kev));
+	ATF_REQUIRE(kevent(kq, NULL, 0, kev, 2, &ts) == 1);
+	ATF_REQUIRE(kev[0].ident == (uintptr_t)efd);
+	ATF_REQUIRE(kev[0].filter == EVFILT_WRITE);
+	ATF_REQUIRE((kev[0].flags & (EV_EOF | EV_ERROR)) == 0);
+	ATF_REQUIRE(kev[0].data == 0);
+
+	/*
+	 * Write the maximum value into the eventfd.  This should result
+	 * in the eventfd becoming readable but NOT writable.
+	 */
+	ATF_REQUIRE(eventfd_write(efd, UINT64_MAX - 1) == 0);
+
+	fds[0].fd = efd;
+	fds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI |
+	    POLLOUT | POLLWRNORM | POLLWRBAND | POLLHUP;
+	fds[0].revents = 0;
+	ATF_REQUIRE(poll(fds, 1, 0) == 1);
+	ATF_REQUIRE(fds[0].revents == (POLLIN | POLLRDNORM));
+
+	FD_ZERO(&readfds);
+	FD_ZERO(&writefds);
+	FD_ZERO(&exceptfds);
+	tv.tv_sec = 0;
+	tv.tv_usec = 0;
+	FD_SET(efd, &readfds);
+	FD_SET(efd, &writefds);
+	FD_SET(efd, &exceptfds);
+	ATF_REQUIRE(select(efd + 1, &readfds, &writefds, &exceptfds, &tv) == 1);
+	ATF_REQUIRE(FD_ISSET(efd, &readfds));
+	ATF_REQUIRE(!FD_ISSET(efd, &writefds));
+	ATF_REQUIRE(!FD_ISSET(efd, &exceptfds));
+
+	/*
+	 * Check that we get an EVFILT_READ event (and only that event)
+	 * on efd.
+	 */
+	memset(kev, 0, sizeof(kev));
+	ATF_REQUIRE(kevent(kq, NULL, 0, kev, 2, &ts) == 1);
+	ATF_REQUIRE(kev[0].ident == (uintptr_t)efd);
+	ATF_REQUIRE(kev[0].filter == EVFILT_READ);
+	ATF_REQUIRE((kev[0].flags & (EV_EOF | EV_ERROR)) == 0);
+	ATF_REQUIRE(kev[0].data == (int64_t)(UINT64_MAX - 1));
+
+	(void) close(kq);
+	(void) close(efd);
+}
+
+/*****************************************************************************/
+
+static void *
+eventfd_select_poll_kevent_block_helper(void * const v)
+{
+	struct helper_context * const ctx = v;
+	struct pollfd fds[1];
+	fd_set selfds;
+	eventfd_t efd_value;
+	int kq;
+	struct kevent kev[1];
+
+	fds[0].fd = ctx->efd;
+	fds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI;
+	fds[0].revents = 0;
+
+	ATF_REQUIRE_ERRNO(EAGAIN,
+	    eventfd_read(ctx->efd, &efd_value) == -1);
+
+	ATF_REQUIRE(wait_barrier(ctx));
+
+	ATF_REQUIRE(get_state(ctx) == 666);
+	ATF_REQUIRE(poll(fds, 1, INFTIM) == 1);
+	ATF_REQUIRE(fds[0].revents == (POLLIN | POLLRDNORM));
+	set_state(ctx, 0);
+
+	ATF_REQUIRE(wait_barrier(ctx));
+
+	/*
+	 * The maximum value was written to the eventfd, so we
+	 * should block waiting for writability.
+	 */
+	fds[0].fd = ctx->efd;
+	fds[0].events = POLLOUT | POLLWRNORM;
+	fds[0].revents = 0;
+
+	ATF_REQUIRE_ERRNO(EAGAIN,
+	    eventfd_write(ctx->efd, UINT64_MAX - 1) == -1);
+
+	ATF_REQUIRE(wait_barrier(ctx));
+
+	ATF_REQUIRE(get_state(ctx) == 666);
+	ATF_REQUIRE(poll(fds, 1, INFTIM) == 1);
+	ATF_REQUIRE(fds[0].revents == (POLLOUT | POLLWRNORM));
+	set_state(ctx, 0);
+
+	ATF_REQUIRE(wait_barrier(ctx));
+
+	/*
+	 * Now, the same dance again, with select().
+	 */
+
+	FD_ZERO(&selfds);
+	FD_SET(ctx->efd, &selfds);
+
+	ATF_REQUIRE_ERRNO(EAGAIN,
+	    eventfd_read(ctx->efd, &efd_value) == -1);
+
+	ATF_REQUIRE(wait_barrier(ctx));
+
+	ATF_REQUIRE(get_state(ctx) == 666);
+	ATF_REQUIRE(select(ctx->efd + 1, &selfds, NULL, NULL, NULL) == 1);
+	ATF_REQUIRE(FD_ISSET(ctx->efd, &selfds));
+	set_state(ctx, 0);
+
+	ATF_REQUIRE(wait_barrier(ctx));
+
+	FD_ZERO(&selfds);
+	FD_SET(ctx->efd, &selfds);
+
+	ATF_REQUIRE_ERRNO(EAGAIN,
+	    eventfd_write(ctx->efd, UINT64_MAX - 1) == -1);
+
+	ATF_REQUIRE(wait_barrier(ctx));
+
+	ATF_REQUIRE(get_state(ctx) == 666);
+	ATF_REQUIRE(select(ctx->efd + 1, NULL, &selfds, NULL, NULL) == 1);
+	ATF_REQUIRE(FD_ISSET(ctx->efd, &selfds));
+	set_state(ctx, 0);
+
+	ATF_REQUIRE(wait_barrier(ctx));
+
+	/*
+	 * Now, the same dance again, with kevent().
+	 */
+	ATF_REQUIRE((kq = kqueue()) >= 0);
+
+	EV_SET(&kev[0], ctx->efd, EVFILT_READ, EV_ADD | EV_ONESHOT, 0, 0, NULL);
+	ATF_REQUIRE(kevent(kq, kev, 1, NULL, 0, NULL) == 0);
+
+	ATF_REQUIRE_ERRNO(EAGAIN,
+	    eventfd_read(ctx->efd, &efd_value) == -1);
+
+	ATF_REQUIRE(wait_barrier(ctx));
+
+	ATF_REQUIRE(get_state(ctx) == 666);
+	ATF_REQUIRE(kevent(kq, NULL, 0, kev, 1, NULL) == 1);
+	ATF_REQUIRE(kev[0].ident == (uintptr_t)ctx->efd);
+	ATF_REQUIRE(kev[0].filter == EVFILT_READ);
+	ATF_REQUIRE((kev[0].flags & (EV_EOF | EV_ERROR)) == 0);
+	ATF_REQUIRE(kev[0].data == (int64_t)(UINT64_MAX - 1));
+	set_state(ctx, 0);
+
+	ATF_REQUIRE(wait_barrier(ctx));
+
+	EV_SET(&kev[0], ctx->efd, EVFILT_WRITE, EV_ADD | EV_ONESHOT, 0, 0,
+	       NULL);
+	ATF_REQUIRE(kevent(kq, kev, 1, NULL, 0, NULL) == 0);
+
+	ATF_REQUIRE_ERRNO(EAGAIN,
+	    eventfd_write(ctx->efd, UINT64_MAX - 1) == -1);
+
+	ATF_REQUIRE(wait_barrier(ctx));
+
+	ATF_REQUIRE(get_state(ctx) == 666);
+	ATF_REQUIRE(kevent(kq, NULL, 0, kev, 1, NULL) == 1);
+	ATF_REQUIRE(kev[0].ident == (uintptr_t)ctx->efd);
+	ATF_REQUIRE(kev[0].filter == EVFILT_WRITE);
+	ATF_REQUIRE((kev[0].flags & (EV_EOF | EV_ERROR)) == 0);
+	ATF_REQUIRE(kev[0].data == 0);
+	set_state(ctx, 0);
+
+	ATF_REQUIRE(wait_barrier(ctx));
+
+	(void) close(kq);
+
+	return NULL;
+}
+
+ATF_TC(eventfd_select_poll_kevent_block);
+ATF_TC_HEAD(eventfd_select_poll_kevent_block, tc)
+{
+	atf_tc_set_md_var(tc, "descr",
+	    "validates select/poll/kevent behavior - return after blocking");
+}
+ATF_TC_BODY(eventfd_select_poll_kevent_block, tc)
+{
+	struct helper_context ctx;
+	pthread_t helper;
+	eventfd_t efd_value;
+	void *join_val;
+
+	init_helper_context(&ctx);
+
+	ATF_REQUIRE((ctx.efd = eventfd(0, EFD_NONBLOCK)) >= 0);
+
+	ATF_REQUIRE(pthread_create(&helper, NULL,
+				   eventfd_select_poll_kevent_block_helper,
+				   &ctx) == 0);
+
+	/*
+	 * Wait for the helper to block in poll().  Give it some time
+	 * so that if the poll returns immediately, we'll notice.
+	 */
+	set_state(&ctx, 666);
+	ATF_REQUIRE(wait_barrier(&ctx));
+	sleep(2);
+	ATF_REQUIRE(get_state(&ctx) == 666);
+
+	/*
+	 * Write the max value to the eventfd so that it becomes readable
+	 * and unblocks the helper waiting in poll().
+	 */
+	ATF_REQUIRE(eventfd_write(ctx.efd, UINT64_MAX - 1) == 0);
+
+	/*
+	 * Ensure the helper woke from the poll() call.
+	 */
+	ATF_REQUIRE(wait_barrier(&ctx));
+	ATF_REQUIRE(get_state(&ctx) == 0);
+
+	/*
+	 * Wait for the helper to block in poll(), this time waiting
+	 * for writability.
+	 */
+	set_state(&ctx, 666);
+	ATF_REQUIRE(wait_barrier(&ctx));
+	sleep(2);
+	ATF_REQUIRE(get_state(&ctx) == 666);
+
+	/*
+	 * Now read the value, which will reset the eventfd to 0 and
+	 * unblock the poll() call.
+	 */
+	ATF_REQUIRE(eventfd_read(ctx.efd, &efd_value) == 0);
+	ATF_REQUIRE(efd_value == UINT64_MAX - 1);
+
+	/*
+	 * Ensure that the helper woke from the poll() call.
+	 */
+	ATF_REQUIRE(wait_barrier(&ctx));
+	ATF_REQUIRE(get_state(&ctx) == 0);
+
+	/*
+	 * Wait for the helper to block in select(), waiting for readability.
+	 */
+	set_state(&ctx, 666);
+	ATF_REQUIRE(wait_barrier(&ctx));
+	sleep(2);
+	ATF_REQUIRE(get_state(&ctx) == 666);
+
+	/*
+	 * Write the max value to the eventfd so that it becomes readable
+	 * and unblocks the helper waiting in select().
+	 */
+	efd_value = UINT64_MAX - 1;
+	ATF_REQUIRE(eventfd_write(ctx.efd, UINT64_MAX - 1) == 0);
+
+	/*
+	 * Ensure the helper woke from the select() call.
+	 */
+	ATF_REQUIRE(wait_barrier(&ctx));
+	ATF_REQUIRE(get_state(&ctx) == 0);
+
+	/*
+	 * Wait for the helper to block in select(), this time waiting
+	 * for writability.
+	 */
+	set_state(&ctx, 666);
+	ATF_REQUIRE(wait_barrier(&ctx));
+	sleep(2);
+	ATF_REQUIRE(get_state(&ctx) == 666);
+
+	/*
+	 * Now read the value, which will reset the eventfd to 0 and
+	 * unblock the select() call.
+	 */
+	ATF_REQUIRE(eventfd_read(ctx.efd, &efd_value) == 0);
+	ATF_REQUIRE(efd_value == UINT64_MAX - 1);
+
+	/*
+	 * Ensure that the helper woke from the select() call.
+	 */
+	ATF_REQUIRE(wait_barrier(&ctx));
+	ATF_REQUIRE(get_state(&ctx) == 0);
+
+	/*
+	 * Wait for the helper to block in kevent(), waiting for readability.
+	 */
+	set_state(&ctx, 666);
+	ATF_REQUIRE(wait_barrier(&ctx));
+	sleep(2);
+	ATF_REQUIRE(get_state(&ctx) == 666);
+
+	/*
+	 * Write the max value to the eventfd so that it becomes readable
+	 * and unblocks the helper waiting in kevent().
+	 */
+	efd_value = UINT64_MAX - 1;
+	ATF_REQUIRE(eventfd_write(ctx.efd, UINT64_MAX - 1) == 0);
+
+	/*
+	 * Ensure the helper woke from the kevent() call.
+	 */
+	ATF_REQUIRE(wait_barrier(&ctx));
+	ATF_REQUIRE(get_state(&ctx) == 0);
+
+	/*
+	 * Wait for the helper to block in kevent(), this time waiting
+	 * for writability.
+	 */
+	set_state(&ctx, 666);
+	ATF_REQUIRE(wait_barrier(&ctx));
+	sleep(2);
+	ATF_REQUIRE(get_state(&ctx) == 666);
+
+	/*
+	 * Now read the value, which will reset the eventfd to 0 and
+	 * unblock the select() call.
+	 */
+	ATF_REQUIRE(eventfd_read(ctx.efd, &efd_value) == 0);
+	ATF_REQUIRE(efd_value == UINT64_MAX - 1);
+
+	/*
+	 * Ensure that the helper woke from the kevent() call.
+	 */
+	ATF_REQUIRE(wait_barrier(&ctx));
+	ATF_REQUIRE(get_state(&ctx) == 0);
+
+	/* Reap the helper. */
+	ATF_REQUIRE(pthread_join(helper, &join_val) == 0);
+
+	(void) close(ctx.efd);
+}
+
+/*****************************************************************************/
+
+static void *
+eventfd_restart_helper(void * const v)
+{
+	struct helper_context * const ctx = v;
+	eventfd_t efd_value;
+
+	/*
+	 * Issue a single read to ensure that the descriptor is valid.
+	 * Thius will not block because it was created with an initial
+	 * count of 1.
+	 */
+	ATF_REQUIRE(eventfd_read(ctx->efd, &efd_value) == 0);
+	ATF_REQUIRE(efd_value == 1);
+
+	ATF_REQUIRE(wait_barrier(ctx));
+
+	/*
+	 * Block in read.  The main thread will close the descriptor,
+	 * which should unblock us and result in EBADF.
+	 */
+	ATF_REQUIRE(get_state(ctx) == 666);
+	ATF_REQUIRE_ERRNO(EBADF, eventfd_read(ctx->efd, &efd_value) == -1);
+	set_state(ctx, 0);
+
+	ATF_REQUIRE(wait_barrier(ctx));
+
+	return NULL;
+}
+
+ATF_TC(eventfd_restart);
+ATF_TC_HEAD(eventfd_restart, tc)
+{
+	atf_tc_set_md_var(tc, "descr",
+	    "exercises the 'restart' fileop code path");
+}
+ATF_TC_BODY(eventfd_restart, tc)
+{
+	struct helper_context ctx;
+	pthread_t helper;
+	void *join_val;
+
+	init_helper_context(&ctx);
+
+	ATF_REQUIRE((ctx.efd = eventfd(1, 0)) >= 0);
+
+	ATF_REQUIRE(pthread_create(&helper, NULL,
+				   eventfd_restart_helper, &ctx) == 0);
+
+	/*
+	 * Wait for the helper to block in read().  Give it some time
+	 * so that if the poll returns immediately, we'll notice.
+	 */
+	set_state(&ctx, 666);
+	ATF_REQUIRE(wait_barrier(&ctx));
+	sleep(2);
+	ATF_REQUIRE(get_state(&ctx) == 666);
+
+	/*
+	 * Close the descriptor.  This should unblock the reader,
+	 * and cause it to receive EBADF.
+	 */
+	ATF_REQUIRE(close(ctx.efd) == 0);
+
+	/*
+	 * Ensure that the helper woke from the read() call.
+	 */
+	ATF_REQUIRE(wait_barrier(&ctx));
+	ATF_REQUIRE(get_state(&ctx) == 0);
+
+	/* Reap the helper. */
+	ATF_REQUIRE(pthread_join(helper, &join_val) == 0);
+}
+
+/*****************************************************************************/
+
+ATF_TC(eventfd_badflags);
+ATF_TC_HEAD(eventfd_badflags, tc)
+{
+	atf_tc_set_md_var(tc, "descr",
+	    "validates behavior when eventfd() called with bad flags");
+}
+ATF_TC_BODY(eventfd_badflags, tc)
+{
+	ATF_REQUIRE_ERRNO(EINVAL,
+	    eventfd(0, ~(EFD_SEMAPHORE | EFD_CLOEXEC | EFD_NONBLOCK)) == -1);
+}
+
+/*****************************************************************************/
+
+ATF_TC(eventfd_bufsize);
+ATF_TC_HEAD(eventfd_bufsize, tc)
+{
+	atf_tc_set_md_var(tc, "descr",
+	    "validates expected buffer size behavior");
+}
+ATF_TC_BODY(eventfd_bufsize, tc)
+{
+	eventfd_t efd_value[2];
+	int efd;
+
+	ATF_REQUIRE((efd = eventfd(1, EFD_NONBLOCK)) >= 0);
+
+	ATF_REQUIRE_ERRNO(EINVAL,
+	    read(efd, efd_value, sizeof(efd_value[0]) - 1) == -1);
+
+	efd_value[0] = 0xdeadbeef;
+	efd_value[1] = 0xdeadbeef;
+	ATF_REQUIRE(read(efd, efd_value, sizeof(efd_value)) ==
+	    sizeof(efd_value[0]));
+	ATF_REQUIRE(efd_value[0] == 1);
+	ATF_REQUIRE(efd_value[1] == 0xdeadbeef);
+
+	ATF_REQUIRE_ERRNO(EINVAL,
+	    write(efd, efd_value, sizeof(efd_value[0]) - 1) == -1);
+	ATF_REQUIRE(write(efd, efd_value, sizeof(efd_value)) ==
+	    sizeof(efd_value[0]));
+
+	ATF_REQUIRE(read(efd, efd_value, sizeof(efd_value)) ==
+	    sizeof(efd_value[0]));
+	ATF_REQUIRE(efd_value[0] == 1);
+	ATF_REQUIRE(efd_value[1] == 0xdeadbeef);
+
+	(void) close(efd);
+}
+
+/*****************************************************************************/
+
+ATF_TP_ADD_TCS(tp)
+{
+	ATF_TP_ADD_TC(tp, eventfd_normal);
+	ATF_TP_ADD_TC(tp, eventfd_semaphore);
+	ATF_TP_ADD_TC(tp, eventfd_badflags);
+	ATF_TP_ADD_TC(tp, eventfd_bufsize);
+	ATF_TP_ADD_TC(tp, eventfd_select_poll_kevent_immed);
+	ATF_TP_ADD_TC(tp, eventfd_select_poll_kevent_block);
+	ATF_TP_ADD_TC(tp, eventfd_restart);
+
+	return atf_no_error();
+}
Index: src/tests/lib/libc/sys/t_timerfd.c
diff -u /dev/null src/tests/lib/libc/sys/t_timerfd.c:1.2
--- /dev/null	Sun Sep 19 15:51:29 2021
+++ src/tests/lib/libc/sys/t_timerfd.c	Sun Sep 19 15:51:28 2021
@@ -0,0 +1,602 @@
+/* $NetBSD: t_timerfd.c,v 1.2 2021/09/19 15:51:28 thorpej Exp $ */
+
+/*-
+ * Copyright (c) 2020 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__COPYRIGHT("@(#) Copyright (c) 2020\
+ The NetBSD Foundation, inc. All rights reserved.");
+__RCSID("$NetBSD: t_timerfd.c,v 1.2 2021/09/19 15:51:28 thorpej Exp $");
+
+#include <sys/types.h>
+#include <sys/event.h>
+#include <sys/select.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/timerfd.h>
+#include <errno.h>
+#include <poll.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <atf-c.h>
+
+struct helper_context {
+	int	fd;
+
+	pthread_barrier_t barrier;
+};
+
+static void
+init_helper_context(struct helper_context * const ctx)
+{
+
+	memset(ctx, 0, sizeof(*ctx));
+
+	ATF_REQUIRE(pthread_barrier_init(&ctx->barrier, NULL, 2) == 0);
+}
+
+static bool
+wait_barrier(struct helper_context * const ctx)
+{
+	int rv = pthread_barrier_wait(&ctx->barrier);
+
+	return rv == 0 || rv == PTHREAD_BARRIER_SERIAL_THREAD;
+}
+
+/*****************************************************************************/
+
+static int
+timerfd_read(int fd, uint64_t *valp)
+{
+	uint64_t val;
+
+	switch (read(fd, &val, sizeof(val))) {
+	case -1:
+		return -1;
+
+	case sizeof(val):
+		*valp = val;
+		return 0;
+
+	default:
+		/* ?? Should never happen. */
+		errno = EIO;
+		return -1;
+	}
+}
+
+/*****************************************************************************/
+
+ATF_TC(timerfd_create);
+ATF_TC_HEAD(timerfd_create, tc)
+{
+	atf_tc_set_md_var(tc, "descr", "validates timerfd_create()");
+}
+ATF_TC_BODY(timerfd_create, tc)
+{
+	int fd;
+
+	ATF_REQUIRE((fd = timerfd_create(CLOCK_REALTIME, 0)) >= 0);
+	(void) close(fd);
+
+	ATF_REQUIRE((fd = timerfd_create(CLOCK_MONOTONIC, 0)) >= 0);
+	(void) close(fd);
+
+	ATF_REQUIRE_ERRNO(EINVAL,
+	    (fd = timerfd_create(CLOCK_VIRTUAL, 0)) == -1);
+
+	ATF_REQUIRE_ERRNO(EINVAL,
+	    (fd = timerfd_create(CLOCK_PROF, 0)) == -1);
+
+	ATF_REQUIRE_ERRNO(EINVAL,
+	    (fd = timerfd_create(CLOCK_REALTIME,
+	    			    ~(TFD_CLOEXEC | TFD_NONBLOCK))) == -1);
+}
+
+/*****************************************************************************/
+
+ATF_TC(timerfd_bogusfd);
+ATF_TC_HEAD(timerfd_bogusfd, tc)
+{
+	atf_tc_set_md_var(tc, "descr",
+	    "validates rejection of bogus fds by timerfd_{get,set}time()");
+}
+ATF_TC_BODY(timerfd_bogusfd, tc)
+{
+	struct itimerspec its = { 0 };
+	int fd;
+
+	ATF_REQUIRE((fd = kqueue()) >= 0);	/* arbitrary fd type */
+
+	ATF_REQUIRE_ERRNO(EINVAL,
+	    timerfd_gettime(fd, &its) == -1);
+
+	its.it_value.tv_sec = 5;
+	ATF_REQUIRE_ERRNO(EINVAL,
+	    timerfd_settime(fd, 0, &its, NULL) == -1);
+
+	(void) close(fd);
+}
+
+/*****************************************************************************/
+
+ATF_TC(timerfd_block);
+ATF_TC_HEAD(timerfd_block, tc)
+{
+	atf_tc_set_md_var(tc, "descr", "validates blocking behavior");
+}
+ATF_TC_BODY(timerfd_block, tc)
+{
+	struct timespec then, now, delta;
+	uint64_t val;
+	int fd;
+
+	ATF_REQUIRE((fd = timerfd_create(CLOCK_MONOTONIC, 0)) >= 0);
+
+	const struct itimerspec its = {
+		.it_value = { .tv_sec = 1, .tv_nsec = 0 },
+		.it_interval = { .tv_sec = 0, .tv_nsec = 0 },
+	};
+
+	ATF_REQUIRE(clock_gettime(CLOCK_MONOTONIC, &then) == 0);
+	ATF_REQUIRE(timerfd_settime(fd, 0, &its, NULL) == 0);
+	ATF_REQUIRE(timerfd_read(fd, &val) == 0);
+	ATF_REQUIRE(clock_gettime(CLOCK_MONOTONIC, &now) == 0);
+	ATF_REQUIRE(val == 1);
+
+	timespecsub(&now, &then, &delta);
+	ATF_REQUIRE(delta.tv_sec == 1);
+
+	(void) close(fd);
+}
+
+/*****************************************************************************/
+
+ATF_TC(timerfd_repeating);
+ATF_TC_HEAD(timerfd_repeating, tc)
+{
+	atf_tc_set_md_var(tc, "descr", "validates repeating timer behavior");
+}
+ATF_TC_BODY(timerfd_repeating, tc)
+{
+	struct timespec then, now, delta;
+	uint64_t val;
+	int fd;
+
+	ATF_REQUIRE((fd = timerfd_create(CLOCK_MONOTONIC,
+					    TFD_NONBLOCK)) >= 0);
+
+	const struct itimerspec its = {
+		.it_value = { .tv_sec = 0, .tv_nsec = 200000000 },
+		.it_interval = { .tv_sec = 0, .tv_nsec = 200000000 },
+	};
+
+	ATF_REQUIRE(clock_gettime(CLOCK_MONOTONIC, &then) == 0);
+	ATF_REQUIRE(timerfd_settime(fd, 0, &its, NULL) == 0);
+	ATF_REQUIRE(sleep(1) == 0);
+	ATF_REQUIRE(clock_gettime(CLOCK_MONOTONIC, &now) == 0);
+	ATF_REQUIRE(timerfd_read(fd, &val) == 0);
+	ATF_REQUIRE(val >= 3 && val <= 5);	/* allow some slop */
+
+	timespecsub(&now, &then, &delta);
+	ATF_REQUIRE(delta.tv_sec == 1);
+
+	(void) close(fd);
+}
+
+/*****************************************************************************/
+
+ATF_TC(timerfd_abstime);
+ATF_TC_HEAD(timerfd_abstime, tc)
+{
+	atf_tc_set_md_var(tc, "descr", "validates specifying abstime");
+}
+ATF_TC_BODY(timerfd_abstime, tc)
+{
+	struct timespec then, now, delta;
+	uint64_t val;
+	int fd;
+
+	ATF_REQUIRE((fd = timerfd_create(CLOCK_MONOTONIC, 0)) >= 0);
+
+	struct itimerspec its = {
+		.it_value = { .tv_sec = 0, .tv_nsec = 0 },
+		.it_interval = { .tv_sec = 0, .tv_nsec = 0 },
+	};
+
+	ATF_REQUIRE(clock_gettime(CLOCK_MONOTONIC, &then) == 0);
+	its.it_value = then;
+	its.it_value.tv_sec += 1;
+	ATF_REQUIRE(timerfd_settime(fd, TFD_TIMER_ABSTIME, &its, NULL) == 0);
+	ATF_REQUIRE(timerfd_read(fd, &val) == 0);
+	ATF_REQUIRE(clock_gettime(CLOCK_MONOTONIC, &now) == 0);
+	ATF_REQUIRE(val == 1);
+
+	timespecsub(&now, &then, &delta);
+	ATF_REQUIRE(delta.tv_sec == 1);
+
+	(void) close(fd);
+}
+
+/*****************************************************************************/
+
+ATF_TC(timerfd_cancel_on_set_immed);
+ATF_TC_HEAD(timerfd_cancel_on_set_immed, tc)
+{
+	atf_tc_set_md_var(tc, "descr", "validates cancel-on-set - immediate");
+	atf_tc_set_md_var(tc, "require.user", "root");
+}
+ATF_TC_BODY(timerfd_cancel_on_set_immed, tc)
+{
+	struct timespec now;
+	uint64_t val;
+	int fd;
+
+	ATF_REQUIRE((fd = timerfd_create(CLOCK_REALTIME, 0)) >= 0);
+
+	const struct itimerspec its = {
+		.it_value = { .tv_sec = 60 * 60, .tv_nsec = 0 },
+		.it_interval = { .tv_sec = 0, .tv_nsec = 0 },
+	};
+
+	ATF_REQUIRE(clock_gettime(CLOCK_REALTIME, &now) == 0);
+	ATF_REQUIRE(timerfd_settime(fd, TFD_TIMER_CANCEL_ON_SET,
+				    &its, NULL) == 0);
+	ATF_REQUIRE(clock_settime(CLOCK_REALTIME, &now) == 0);
+	ATF_REQUIRE_ERRNO(ECANCELED, timerfd_read(fd, &val) == -1);
+
+	(void) close(fd);
+}
+
+/*****************************************************************************/
+
+static void *
+timerfd_cancel_on_set_block_helper(void * const v)
+{
+	struct helper_context * const ctx = v;
+	struct timespec now;
+
+	ATF_REQUIRE(wait_barrier(ctx));
+
+	ATF_REQUIRE(sleep(2) == 0);
+	ATF_REQUIRE(clock_gettime(CLOCK_REALTIME, &now) == 0);
+	ATF_REQUIRE(clock_settime(CLOCK_REALTIME, &now) == 0);
+
+	return NULL;
+}
+
+ATF_TC(timerfd_cancel_on_set_block);
+ATF_TC_HEAD(timerfd_cancel_on_set_block, tc)
+{
+	atf_tc_set_md_var(tc, "descr", "validates cancel-on-set - blocking");
+	atf_tc_set_md_var(tc, "require.user", "root");
+}
+ATF_TC_BODY(timerfd_cancel_on_set_block, tc)
+{
+	struct helper_context ctx;
+	pthread_t helper;
+	void *join_val;
+	uint64_t val;
+	int fd;
+
+	ATF_REQUIRE((fd = timerfd_create(CLOCK_REALTIME, 0)) >= 0);
+
+	const struct itimerspec its = {
+		.it_value = { .tv_sec = 60 * 60, .tv_nsec = 0 },
+		.it_interval = { .tv_sec = 0, .tv_nsec = 0 },
+	};
+
+	init_helper_context(&ctx);
+
+	ATF_REQUIRE(timerfd_settime(fd, TFD_TIMER_CANCEL_ON_SET,
+				    &its, NULL) == 0);
+	ATF_REQUIRE(pthread_create(&helper, NULL,
+				timerfd_cancel_on_set_block_helper, &ctx) == 0);
+	ATF_REQUIRE(wait_barrier(&ctx));
+	ATF_REQUIRE_ERRNO(ECANCELED, timerfd_read(fd, &val) == -1);
+
+	ATF_REQUIRE(pthread_join(helper, &join_val) == 0);
+
+	(void) close(fd);
+}
+
+/*****************************************************************************/
+
+ATF_TC(timerfd_select_poll_kevent_immed);
+ATF_TC_HEAD(timerfd_select_poll_kevent_immed, tc)
+{
+	atf_tc_set_md_var(tc, "descr",
+	    "validates select/poll/kevent behavior - immediate return");
+}
+ATF_TC_BODY(timerfd_select_poll_kevent_immed, tc)
+{
+	const struct timespec ts = { .tv_sec = 0, .tv_nsec = 0 };
+	struct itimerspec its;
+	struct timeval tv;
+	struct stat st;
+	struct pollfd fds[1];
+	uint64_t val;
+	fd_set readfds, writefds, exceptfds;
+	int fd;
+	int kq;
+	struct kevent kev[1];
+
+	ATF_REQUIRE((fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK)) >= 0);
+
+	ATF_REQUIRE((kq = kqueue()) >= 0);
+	EV_SET(&kev[0], fd, EVFILT_READ, EV_ADD, 0, 0, NULL);
+	ATF_REQUIRE(kevent(kq, kev, 1, NULL, 0, &ts) == 0);
+
+	/*
+	 * fd should be writable but not readable.  Pass all of the
+	 * event bits; we should only get back POLLOUT | POLLWRNORM.
+	 * (It's writable only in so far as we'll get an error if we try.)
+	 */
+	fds[0].fd = fd;
+	fds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI |
+	    POLLOUT | POLLWRNORM | POLLWRBAND | POLLHUP;
+	fds[0].revents = 0;
+	ATF_REQUIRE(poll(fds, 1, 0) == 1);
+	ATF_REQUIRE(fds[0].revents == (POLLOUT | POLLWRNORM));
+
+	/*
+	 * As above; fd should only be set in writefds upon return
+	 * from the select() call.
+	 */
+	FD_ZERO(&readfds);
+	FD_ZERO(&writefds);
+	FD_ZERO(&exceptfds);
+	tv.tv_sec = 0;
+	tv.tv_usec = 0;
+	FD_SET(fd, &readfds);
+	FD_SET(fd, &writefds);
+	FD_SET(fd, &exceptfds);
+	ATF_REQUIRE(select(fd + 1, &readfds, &writefds, &exceptfds, &tv) == 1);
+	ATF_REQUIRE(!FD_ISSET(fd, &readfds));
+	ATF_REQUIRE(FD_ISSET(fd, &writefds));
+	ATF_REQUIRE(!FD_ISSET(fd, &exceptfds));
+
+	/*
+	 * Now set a one-shot half-second timer, wait for it to expire, and
+	 * then check again.
+	 */
+	memset(&its, 0, sizeof(its));
+	its.it_value.tv_sec = 0;
+	its.it_value.tv_nsec = 500000000;
+	ATF_REQUIRE(timerfd_settime(fd, 0, &its, NULL) == 0);
+	ATF_REQUIRE(sleep(2) == 0);
+
+	/* Verify it actually fired via the stat() back-channel. */
+	ATF_REQUIRE(fstat(fd, &st) == 0);
+	ATF_REQUIRE(st.st_size == 1);
+
+	fds[0].fd = fd;
+	fds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI |
+	    POLLOUT | POLLWRNORM | POLLWRBAND | POLLHUP;
+	fds[0].revents = 0;
+	ATF_REQUIRE(poll(fds, 1, 0) == 1);
+	ATF_REQUIRE(fds[0].revents == (POLLIN | POLLRDNORM |
+				       POLLOUT | POLLWRNORM));
+
+	FD_ZERO(&readfds);
+	FD_ZERO(&writefds);
+	FD_ZERO(&exceptfds);
+	tv.tv_sec = 0;
+	tv.tv_usec = 0;
+	FD_SET(fd, &readfds);
+	FD_SET(fd, &writefds);
+	FD_SET(fd, &exceptfds);
+	ATF_REQUIRE(select(fd + 1, &readfds, &writefds, &exceptfds, &tv) == 2);
+	ATF_REQUIRE(FD_ISSET(fd, &readfds));
+	ATF_REQUIRE(FD_ISSET(fd, &writefds));
+	ATF_REQUIRE(!FD_ISSET(fd, &exceptfds));
+
+	/*
+	 * Check that we get an EVFILT_READ event on fd.
+	 */
+	memset(kev, 0, sizeof(kev));
+	ATF_REQUIRE(kevent(kq, NULL, 0, kev, 1, &ts) == 1);
+	ATF_REQUIRE(kev[0].ident == (uintptr_t)fd);
+	ATF_REQUIRE(kev[0].filter == EVFILT_READ);
+	ATF_REQUIRE((kev[0].flags & (EV_EOF | EV_ERROR)) == 0);
+	ATF_REQUIRE(kev[0].data == 1);
+
+	/*
+	 * Read the timerfd to ensure we get the correct numnber of
+	 * expirations.
+	 */
+	ATF_REQUIRE(timerfd_read(fd, &val) == 0);
+	ATF_REQUIRE(val == 1);
+
+	/* And ensure that we would block if we tried again. */
+	ATF_REQUIRE_ERRNO(EAGAIN, timerfd_read(fd, &val) == -1);
+
+	(void) close(kq);
+	(void) close(fd);
+}
+
+/*****************************************************************************/
+
+ATF_TC(timerfd_select_poll_kevent_block);
+ATF_TC_HEAD(timerfd_select_poll_kevent_block, tc)
+{
+	atf_tc_set_md_var(tc, "descr",
+	    "validates select/poll/kevent behavior - blocking");
+}
+ATF_TC_BODY(timerfd_select_poll_kevent_block, tc)
+{
+	const struct timespec ts = { .tv_sec = 0, .tv_nsec = 0 };
+	struct timespec then, now;
+	struct pollfd fds[1];
+	fd_set readfds;
+	int fd;
+	int kq;
+	struct kevent kev[1];
+
+	ATF_REQUIRE((fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK)) >= 0);
+
+	ATF_REQUIRE((kq = kqueue()) >= 0);
+	EV_SET(&kev[0], fd, EVFILT_READ, EV_ADD, 0, 0, NULL);
+	ATF_REQUIRE(kevent(kq, kev, 1, NULL, 0, &ts) == 0);
+
+	/*
+	 * For each of these tests, we do the following:
+	 *
+	 * - Get the current time.
+	 * - Set a 1-second one-shot timer.
+	 * - Block in the multiplexing call.
+	 * - Get the current time and verify that the timer expiration
+	 *   interval has passed.
+	 */
+
+	const struct itimerspec its = {
+		.it_value = { .tv_sec = 1, .tv_nsec = 0 },
+		.it_interval = { .tv_sec = 0, .tv_nsec = 0 },
+	};
+
+	/* poll(2) */
+	fds[0].fd = fd;
+	fds[0].events = POLLIN | POLLRDNORM;
+	fds[0].revents = 0;
+
+	ATF_REQUIRE(clock_gettime(CLOCK_MONOTONIC, &then) == 0);
+	ATF_REQUIRE(timerfd_settime(fd, 0, &its, NULL) == 0);
+	ATF_REQUIRE(poll(fds, 1, INFTIM) == 1);
+	ATF_REQUIRE(clock_gettime(CLOCK_MONOTONIC, &now) == 0);
+	ATF_REQUIRE(fds[0].revents == (POLLIN | POLLRDNORM));
+	ATF_REQUIRE(now.tv_sec - then.tv_sec >= 1);
+
+	/* select(2) */
+	FD_ZERO(&readfds);
+	FD_SET(fd, &readfds);
+
+	ATF_REQUIRE(clock_gettime(CLOCK_MONOTONIC, &then) == 0);
+	ATF_REQUIRE(timerfd_settime(fd, 0, &its, NULL) == 0);
+	ATF_REQUIRE(select(fd + 1, &readfds, NULL, NULL, NULL) == 1);
+	ATF_REQUIRE(clock_gettime(CLOCK_MONOTONIC, &now) == 0);
+	ATF_REQUIRE(FD_ISSET(fd, &readfds));
+	ATF_REQUIRE(now.tv_sec - then.tv_sec >= 1);
+
+	/* kevent(2) */
+	memset(kev, 0, sizeof(kev));
+	ATF_REQUIRE(clock_gettime(CLOCK_MONOTONIC, &then) == 0);
+	ATF_REQUIRE(timerfd_settime(fd, 0, &its, NULL) == 0);
+	ATF_REQUIRE(kevent(kq, NULL, 0, kev, 1, NULL) == 1);
+	ATF_REQUIRE(clock_gettime(CLOCK_MONOTONIC, &now) == 0);
+	ATF_REQUIRE(kev[0].ident == (uintptr_t)fd);
+	ATF_REQUIRE(kev[0].filter == EVFILT_READ);
+	ATF_REQUIRE((kev[0].flags & (EV_EOF | EV_ERROR)) == 0);
+	ATF_REQUIRE(kev[0].data == 1);
+
+	(void) close(kq);
+	(void) close(fd);
+}
+
+/*****************************************************************************/
+
+static void *
+timerfd_restart_helper(void * const v)
+{
+	struct helper_context * const ctx = v;
+
+	ATF_REQUIRE(wait_barrier(ctx));
+
+	/*
+	 * Wait 5 seconds (that should give the main thread time to
+	 * block), and then close the descriptor.
+	 */
+	ATF_REQUIRE(sleep(5) == 0);
+	ATF_REQUIRE(close(ctx->fd) == 0);
+
+	return NULL;
+}
+
+ATF_TC(timerfd_restart);
+ATF_TC_HEAD(timerfd_restart, tc)
+{
+	atf_tc_set_md_var(tc, "descr",
+	    "exercises the 'restart' fileop code path");
+}
+ATF_TC_BODY(timerfd_restart, tc)
+{
+	struct timespec then, now, delta;
+	struct helper_context ctx;
+	uint64_t val;
+	pthread_t helper;
+	void *join_val;
+
+	init_helper_context(&ctx);
+
+	ATF_REQUIRE((ctx.fd = timerfd_create(CLOCK_MONOTONIC, 0)) >= 0);
+
+	const struct itimerspec its = {
+		.it_value = { .tv_sec = 60 * 60, .tv_nsec = 0 },
+		.it_interval = { .tv_sec = 0, .tv_nsec = 0 },
+	};
+	ATF_REQUIRE(timerfd_settime(ctx.fd, 0, &its, NULL) == 0);
+
+
+	ATF_REQUIRE(clock_gettime(CLOCK_MONOTONIC, &then) == 0);
+	ATF_REQUIRE(pthread_create(&helper, NULL,
+				   timerfd_restart_helper, &ctx) == 0);
+
+	/*
+	 * Wait for the helper to be ready, and then immediately block
+	 * in read().  The helper will close the file, and we should get
+	 * EBADF after a few seconds.
+	 */
+	ATF_REQUIRE(wait_barrier(&ctx));
+	ATF_REQUIRE_ERRNO(EBADF, timerfd_read(ctx.fd, &val) == -1);
+	ATF_REQUIRE(clock_gettime(CLOCK_MONOTONIC, &now) == 0);
+
+	timespecsub(&now, &then, &delta);
+	ATF_REQUIRE(delta.tv_sec >= 5);
+
+	/* Reap the helper. */
+	ATF_REQUIRE(pthread_join(helper, &join_val) == 0);
+}
+
+/*****************************************************************************/
+
+ATF_TP_ADD_TCS(tp)
+{
+	ATF_TP_ADD_TC(tp, timerfd_create);
+	ATF_TP_ADD_TC(tp, timerfd_bogusfd);
+	ATF_TP_ADD_TC(tp, timerfd_block);
+	ATF_TP_ADD_TC(tp, timerfd_repeating);
+	ATF_TP_ADD_TC(tp, timerfd_abstime);
+	ATF_TP_ADD_TC(tp, timerfd_cancel_on_set_block);
+	ATF_TP_ADD_TC(tp, timerfd_cancel_on_set_immed);
+	ATF_TP_ADD_TC(tp, timerfd_select_poll_kevent_immed);
+	ATF_TP_ADD_TC(tp, timerfd_select_poll_kevent_block);
+	ATF_TP_ADD_TC(tp, timerfd_restart);
+
+	return atf_no_error();
+}

Reply via email to