Module Name: src
Committed By: thorpej
Date: Sun Sep 19 15:51:28 UTC 2021
Modified Files:
src/distrib/sets/lists/base: shl.mi
src/distrib/sets/lists/comp: mi
src/distrib/sets/lists/debug: mi shl.mi
src/distrib/sets/lists/tests: mi
src/lib/libc: shlib_version
src/lib/libc/sys: Makefile.inc
src/sys/kern: files.kern syscalls.master
src/sys/sys: Makefile file.h
src/tests/lib/libc/sys: Makefile
Added Files:
src/lib/libc/sys: eventfd.2 eventfd_read.c eventfd_write.c timerfd.2
src/sys/kern: sys_eventfd.c sys_timerfd.c
src/sys/sys: eventfd.h timerfd.h
src/tests/lib/libc/sys: t_eventfd.c t_timerfd.c
Log Message:
Add native implementations of eventfd(2) and timerfd(2), compatible with
the Linux interfaces of the same name.
To generate a diff of this commit:
cvs rdiff -u -r1.925 -r1.926 src/distrib/sets/lists/base/shl.mi
cvs rdiff -u -r1.2392 -r1.2393 src/distrib/sets/lists/comp/mi
cvs rdiff -u -r1.360 -r1.361 src/distrib/sets/lists/debug/mi
cvs rdiff -u -r1.282 -r1.283 src/distrib/sets/lists/debug/shl.mi
cvs rdiff -u -r1.1125 -r1.1126 src/distrib/sets/lists/tests/mi
cvs rdiff -u -r1.291 -r1.292 src/lib/libc/shlib_version
cvs rdiff -u -r1.245 -r1.246 src/lib/libc/sys/Makefile.inc
cvs rdiff -u -r0 -r1.1 src/lib/libc/sys/eventfd.2 \
src/lib/libc/sys/eventfd_read.c src/lib/libc/sys/eventfd_write.c \
src/lib/libc/sys/timerfd.2
cvs rdiff -u -r1.56 -r1.57 src/sys/kern/files.kern
cvs rdiff -u -r0 -r1.2 src/sys/kern/sys_eventfd.c src/sys/kern/sys_timerfd.c
cvs rdiff -u -r1.307 -r1.308 src/sys/kern/syscalls.master
cvs rdiff -u -r1.177 -r1.178 src/sys/sys/Makefile
cvs rdiff -u -r0 -r1.2 src/sys/sys/eventfd.h src/sys/sys/timerfd.h
cvs rdiff -u -r1.87 -r1.88 src/sys/sys/file.h
cvs rdiff -u -r1.68 -r1.69 src/tests/lib/libc/sys/Makefile
cvs rdiff -u -r0 -r1.2 src/tests/lib/libc/sys/t_eventfd.c \
src/tests/lib/libc/sys/t_timerfd.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/distrib/sets/lists/base/shl.mi
diff -u src/distrib/sets/lists/base/shl.mi:1.925 src/distrib/sets/lists/base/shl.mi:1.926
--- src/distrib/sets/lists/base/shl.mi:1.925 Thu Sep 2 11:28:45 2021
+++ src/distrib/sets/lists/base/shl.mi Sun Sep 19 15:51:27 2021
@@ -1,4 +1,4 @@
-# $NetBSD: shl.mi,v 1.925 2021/09/02 11:28:45 christos Exp $
+# $NetBSD: shl.mi,v 1.926 2021/09/19 15:51:27 thorpej Exp $
#
# Note: Don't delete entries from here - mark them as "obsolete" instead,
# unless otherwise stated below.
@@ -24,7 +24,7 @@
./lib/libblocklist.so.0.0 base-sys-shlib dynamicroot
./lib/libc.so base-sys-shlib dynamicroot
./lib/libc.so.12 base-sys-shlib dynamicroot
-./lib/libc.so.12.218 base-sys-shlib dynamicroot
+./lib/libc.so.12.219 base-sys-shlib dynamicroot
./lib/libcrypt.so base-sys-shlib dynamicroot
./lib/libcrypt.so.1 base-sys-shlib dynamicroot
./lib/libcrypt.so.1.0 base-sys-shlib dynamicroot
@@ -251,7 +251,7 @@
./usr/lib/libc++.so.1.0 base-sys-shlib compatfile,libcxx
./usr/lib/libc.so base-sys-shlib compatfile
./usr/lib/libc.so.12 base-sys-shlib compatfile
-./usr/lib/libc.so.12.218 base-sys-shlib compatfile
+./usr/lib/libc.so.12.219 base-sys-shlib compatfile
./usr/lib/libcbor.so base-sys-shlib compatfile
./usr/lib/libcbor.so.0 base-sys-shlib compatfile
./usr/lib/libcbor.so.0.5 base-sys-shlib compatfile
Index: src/distrib/sets/lists/comp/mi
diff -u src/distrib/sets/lists/comp/mi:1.2392 src/distrib/sets/lists/comp/mi:1.2393
--- src/distrib/sets/lists/comp/mi:1.2392 Thu Sep 16 23:32:49 2021
+++ src/distrib/sets/lists/comp/mi Sun Sep 19 15:51:27 2021
@@ -1,4 +1,4 @@
-# $NetBSD: mi,v 1.2392 2021/09/16 23:32:49 christos Exp $
+# $NetBSD: mi,v 1.2393 2021/09/19 15:51:27 thorpej Exp $
#
# Note: don't delete entries from here - mark them as "obsolete" instead.
./etc/mtree/set.comp comp-sys-root
@@ -3279,6 +3279,7 @@
./usr/include/sys/errno.h comp-c-include
./usr/include/sys/evcnt.h comp-c-include
./usr/include/sys/event.h comp-c-include
+./usr/include/sys/eventfd.h comp-c-include
./usr/include/sys/exec.h comp-c-include
./usr/include/sys/exec_aout.h comp-c-include
./usr/include/sys/exec_coff.h comp-c-include
@@ -3430,6 +3431,7 @@
./usr/include/sys/time.h comp-c-include
./usr/include/sys/timeb.h comp-c-include
./usr/include/sys/timepps.h comp-c-include
+./usr/include/sys/timerfd.h comp-c-include
./usr/include/sys/times.h comp-c-include
./usr/include/sys/timespec.h comp-c-include
./usr/include/sys/timex.h comp-c-include
@@ -4659,6 +4661,9 @@
./usr/share/man/cat2/dup2.0 comp-c-catman .cat
./usr/share/man/cat2/dup3.0 comp-c-catman .cat
./usr/share/man/cat2/errno.0 comp-c-catman .cat
+./usr/share/man/cat2/eventfd.0 comp-c-catman .cat
+./usr/share/man/cat2/eventfd_read.0 comp-c-catman .cat
+./usr/share/man/cat2/eventfd_write.0 comp-c-catman .cat
./usr/share/man/cat2/execve.0 comp-c-catman .cat
./usr/share/man/cat2/extattr_delete_fd.0 comp-c-catman .cat
./usr/share/man/cat2/extattr_delete_file.0 comp-c-catman .cat
@@ -4897,6 +4902,10 @@
./usr/share/man/cat2/timer_getoverrun.0 comp-c-catman .cat
./usr/share/man/cat2/timer_gettime.0 comp-c-catman .cat
./usr/share/man/cat2/timer_settime.0 comp-c-catman .cat
+./usr/share/man/cat2/timerfd.0 comp-c-catman .cat
+./usr/share/man/cat2/timerfd_create.0 comp-c-catman .cat
+./usr/share/man/cat2/timerfd_gettime.0 comp-c-catman .cat
+./usr/share/man/cat2/timerfd_settime.0 comp-c-catman .cat
./usr/share/man/cat2/truncate.0 comp-c-catman .cat
./usr/share/man/cat2/ucontext.0 comp-c-catman .cat
./usr/share/man/cat2/umask.0 comp-c-catman .cat
@@ -12994,6 +13003,9 @@
./usr/share/man/html2/dup2.html comp-c-htmlman html
./usr/share/man/html2/dup3.html comp-c-htmlman html
./usr/share/man/html2/errno.html comp-c-htmlman html
+./usr/share/man/html2/eventfd.html comp-c-htmlman html
+./usr/share/man/html2/eventfd_read.html comp-c-htmlman html
+./usr/share/man/html2/eventfd_write.html comp-c-htmlman html
./usr/share/man/html2/execve.html comp-c-htmlman html
./usr/share/man/html2/extattr_delete_fd.html comp-c-htmlman html
./usr/share/man/html2/extattr_delete_file.html comp-c-htmlman html
@@ -13218,6 +13230,10 @@
./usr/share/man/html2/timer_getoverrun.html comp-c-htmlman html
./usr/share/man/html2/timer_gettime.html comp-c-htmlman html
./usr/share/man/html2/timer_settime.html comp-c-htmlman html
+./usr/share/man/html2/timerfd.html comp-c-htmlman html
+./usr/share/man/html2/timerfd_create.html comp-c-htmlman html
+./usr/share/man/html2/timerfd_gettime.html comp-c-htmlman html
+./usr/share/man/html2/timerfd_settime.html comp-c-htmlman html
./usr/share/man/html2/truncate.html comp-c-htmlman html
./usr/share/man/html2/ucontext.html comp-c-htmlman html
./usr/share/man/html2/umask.html comp-c-htmlman html
@@ -21146,6 +21162,9 @@
./usr/share/man/man2/dup2.2 comp-c-man .man
./usr/share/man/man2/dup3.2 comp-c-man .man
./usr/share/man/man2/errno.2 comp-c-man .man
+./usr/share/man/man2/eventfd.2 comp-c-man .man
+./usr/share/man/man2/eventfd_read.2 comp-c-man .man
+./usr/share/man/man2/eventfd_write.2 comp-c-man .man
./usr/share/man/man2/execve.2 comp-c-man .man
./usr/share/man/man2/extattr_delete_fd.2 comp-c-man .man
./usr/share/man/man2/extattr_delete_file.2 comp-c-man .man
@@ -21382,6 +21401,10 @@
./usr/share/man/man2/timer_getoverrun.2 comp-c-man .man
./usr/share/man/man2/timer_gettime.2 comp-c-man .man
./usr/share/man/man2/timer_settime.2 comp-c-man .man
+./usr/share/man/man2/timerfd.2 comp-c-man .man
+./usr/share/man/man2/timerfd_create.2 comp-c-man .man
+./usr/share/man/man2/timerfd_gettime.2 comp-c-man .man
+./usr/share/man/man2/timerfd_settime.2 comp-c-man .man
./usr/share/man/man2/truncate.2 comp-c-man .man
./usr/share/man/man2/ucontext.2 comp-c-man .man
./usr/share/man/man2/umask.2 comp-c-man .man
Index: src/distrib/sets/lists/debug/mi
diff -u src/distrib/sets/lists/debug/mi:1.360 src/distrib/sets/lists/debug/mi:1.361
--- src/distrib/sets/lists/debug/mi:1.360 Sun Aug 29 09:54:18 2021
+++ src/distrib/sets/lists/debug/mi Sun Sep 19 15:51:28 2021
@@ -1,4 +1,4 @@
-# $NetBSD: mi,v 1.360 2021/08/29 09:54:18 christos Exp $
+# $NetBSD: mi,v 1.361 2021/09/19 15:51:28 thorpej Exp $
./etc/mtree/set.debug comp-sys-root
./usr/lib comp-sys-usr compatdir
./usr/lib/i18n/libBIG5_g.a comp-c-debuglib debuglib,compatfile
@@ -2157,6 +2157,7 @@
./usr/libdata/debug/usr/tests/lib/libc/sys/t_connect.debug tests-lib-debug debug,atf,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/sys/t_context.debug tests-obsolete obsolete,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/sys/t_dup.debug tests-lib-debug debug,atf,compattestfile
+./usr/libdata/debug/usr/tests/lib/libc/sys/t_eventfd.debug tests-lib-debug debug,atf,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/sys/t_fork.debug tests-lib-debug debug,atf,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/sys/t_fsync.debug tests-lib-debug debug,atf,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/sys/t_futex_ops.debug tests-lib-tests debug,atf,compattestfile
@@ -2224,6 +2225,7 @@
./usr/libdata/debug/usr/tests/lib/libc/sys/t_swapcontext.debug tests-lib-debug debug,atf,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/sys/t_syscall.debug tests-lib-debug debug,atf,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/sys/t_timer_create.debug tests-lib-debug debug,atf,compattestfile
+./usr/libdata/debug/usr/tests/lib/libc/sys/t_timerfd.debug tests-lib-debug debug,atf,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/sys/t_truncate.debug tests-lib-debug debug,atf,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/sys/t_ucontext.debug tests-lib-debug debug,atf,compattestfile
./usr/libdata/debug/usr/tests/lib/libc/sys/t_umask.debug tests-lib-debug debug,atf,compattestfile
Index: src/distrib/sets/lists/debug/shl.mi
diff -u src/distrib/sets/lists/debug/shl.mi:1.282 src/distrib/sets/lists/debug/shl.mi:1.283
--- src/distrib/sets/lists/debug/shl.mi:1.282 Thu Sep 2 11:28:45 2021
+++ src/distrib/sets/lists/debug/shl.mi Sun Sep 19 15:51:28 2021
@@ -1,10 +1,10 @@
-# $NetBSD: shl.mi,v 1.282 2021/09/02 11:28:45 christos Exp $
+# $NetBSD: shl.mi,v 1.283 2021/09/19 15:51:28 thorpej Exp $
./usr/lib/libbfd_g.a comp-c-debuglib debuglib,compatfile,binutils
./usr/libdata/debug/lib base-sys-usr debug,dynamicroot,compatdir
./usr/libdata/debug/lib/libavl.so.0.0.debug comp-zfs-debug debug,dynamicroot,zfs
./usr/libdata/debug/lib/libblacklist.so.0.0.debug comp-obsolete obsolete,compatfile
./usr/libdata/debug/lib/libblocklist.so.0.0.debug comp-sys-debug debug,dynamicroot
-./usr/libdata/debug/lib/libc.so.12.218.debug comp-sys-debug debug,dynamicroot
+./usr/libdata/debug/lib/libc.so.12.219.debug comp-sys-debug debug,dynamicroot
./usr/libdata/debug/lib/libcrypt.so.1.0.debug comp-sys-debug debug,dynamicroot
./usr/libdata/debug/lib/libcrypto.so.12.0.debug comp-sys-debug debug,dynamicroot,openssl=10
./usr/libdata/debug/lib/libcrypto.so.14.0.debug comp-sys-debug debug,dynamicroot,openssl=11
@@ -84,7 +84,7 @@
./usr/libdata/debug/usr/lib/libbsdmalloc.so.0.0.debug comp-sys-debug debug,compatfile
./usr/libdata/debug/usr/lib/libbz2.so.1.1.debug comp-sys-debug debug,compatfile
./usr/libdata/debug/usr/lib/libc++.so.1.0.debug comp-sys-debug debug,compatfile,libcxx
-./usr/libdata/debug/usr/lib/libc.so.12.218.debug comp-sys-debug debug,compatfile
+./usr/libdata/debug/usr/lib/libc.so.12.219.debug comp-sys-debug debug,compatfile
./usr/libdata/debug/usr/lib/libcbor.so.0.5.debug comp-sys-debug debug,compatfile
./usr/libdata/debug/usr/lib/libcom_err.so.8.0.debug comp-krb5-debug debug,compatfile,kerberos
./usr/libdata/debug/usr/lib/libcrypt.so.1.0.debug comp-sys-debug debug,compatfile
Index: src/distrib/sets/lists/tests/mi
diff -u src/distrib/sets/lists/tests/mi:1.1125 src/distrib/sets/lists/tests/mi:1.1126
--- src/distrib/sets/lists/tests/mi:1.1125 Mon Sep 13 22:09:06 2021
+++ src/distrib/sets/lists/tests/mi Sun Sep 19 15:51:28 2021
@@ -1,4 +1,4 @@
-# $NetBSD: mi,v 1.1125 2021/09/13 22:09:06 rillig Exp $
+# $NetBSD: mi,v 1.1126 2021/09/19 15:51:28 thorpej Exp $
#
# Note: don't delete entries from here - mark them as "obsolete" instead.
#
@@ -3145,6 +3145,7 @@
./usr/tests/lib/libc/sys/t_connect tests-lib-tests compattestfile,atf
./usr/tests/lib/libc/sys/t_context tests-obsolete obsolete
./usr/tests/lib/libc/sys/t_dup tests-lib-tests compattestfile,atf
+./usr/tests/lib/libc/sys/t_eventfd tests-lib-tests compattestfile,atf
./usr/tests/lib/libc/sys/t_fork tests-lib-tests compattestfile,atf
./usr/tests/lib/libc/sys/t_fsync tests-lib-tests compattestfile,atf
./usr/tests/lib/libc/sys/t_futex_ops tests-lib-tests compattestfile,atf
@@ -3212,6 +3213,7 @@
./usr/tests/lib/libc/sys/t_swapcontext tests-lib-tests compattestfile,atf
./usr/tests/lib/libc/sys/t_syscall tests-lib-tests compattestfile,atf
./usr/tests/lib/libc/sys/t_timer_create tests-lib-tests compattestfile,atf
+./usr/tests/lib/libc/sys/t_timerfd tests-lib-tests compattestfile,atf
./usr/tests/lib/libc/sys/t_truncate tests-lib-tests compattestfile,atf
./usr/tests/lib/libc/sys/t_ucontext tests-lib-tests compattestfile,atf
./usr/tests/lib/libc/sys/t_umask tests-lib-tests compattestfile,atf
Index: src/lib/libc/shlib_version
diff -u src/lib/libc/shlib_version:1.291 src/lib/libc/shlib_version:1.292
--- src/lib/libc/shlib_version:1.291 Fri Oct 9 18:38:48 2020
+++ src/lib/libc/shlib_version Sun Sep 19 15:51:28 2021
@@ -1,4 +1,4 @@
-# $NetBSD: shlib_version,v 1.291 2020/10/09 18:38:48 christos Exp $
+# $NetBSD: shlib_version,v 1.292 2021/09/19 15:51:28 thorpej Exp $
# Remember to update distrib/sets/lists/base/shl.* when changing
#
# things we wish to do on next major version bump:
@@ -54,4 +54,4 @@
# - the syscall stubs for the (obsolete) lfs syscalls should be removed
# - remove tzsetwall(3), upstream has removed it
major=12
-minor=218
+minor=219
Index: src/lib/libc/sys/Makefile.inc
diff -u src/lib/libc/sys/Makefile.inc:1.245 src/lib/libc/sys/Makefile.inc:1.246
--- src/lib/libc/sys/Makefile.inc:1.245 Fri Aug 14 00:53:16 2020
+++ src/lib/libc/sys/Makefile.inc Sun Sep 19 15:51:28 2021
@@ -1,4 +1,4 @@
-# $NetBSD: Makefile.inc,v 1.245 2020/08/14 00:53:16 riastradh Exp $
+# $NetBSD: Makefile.inc,v 1.246 2021/09/19 15:51:28 thorpej Exp $
# @(#)Makefile.inc 8.3 (Berkeley) 10/24/94
# sys sources
@@ -7,9 +7,10 @@
# other sources shared with the kernel, used in syscalls
SRCS+= cpuset.c
# glue to offer userland wrappers for some syscalls
-SRCS+= accept4.c clock_getcpuclockid.c posix_fadvise.c posix_madvise.c \
- ppoll.c sched.c sigqueue.c sigtimedwait.c sigwait.c sigwaitinfo.c \
- statvfs.c swapon.c semctl.c vadvise.c
+SRCS+= accept4.c clock_getcpuclockid.c eventfd_read.c eventfd_write.c \
+ posix_fadvise.c posix_madvise.c ppoll.c sched.c sigqueue.c \
+ sigtimedwait.c sigwait.c sigwaitinfo.c statvfs.c swapon.c semctl.c \
+ vadvise.c
.if ${RUMPRUN} != "yes"
# modules with non-default implementations on at least one architecture:
@@ -100,6 +101,7 @@ ASM=\
clock_getcpuclockid2.S \
__clock_getres50.S __clock_gettime50.S \
dup.S dup2.S dup3.S \
+ eventfd.S \
extattrctl.S \
extattr_delete_fd.S extattr_delete_file.S \
extattr_delete_link.S extattr_get_fd.S extattr_get_file.S \
@@ -154,6 +156,7 @@ ASM=\
__statvfs190.S swapctl.S symlink.S symlinkat.S __sysctl.S \
timer_create.S timer_delete.S __timer_gettime50.S timer_getoverrun.S \
____semctl50.S __timer_settime50.S \
+ timerfd_create.S timerfd_gettime.S timerfd_settime.S \
umask.S undelete.S unlink.S unlinkat.S unmount.S __utimes50.S \
utimensat.S utrace.S uuidgen.S
@@ -251,7 +254,7 @@ LintSysPseudoNoerr.c: ${LIBCDIR}/sys/mak
MAN+= accept.2 access.2 acct.2 adjtime.2 bind.2 brk.2 chdir.2 \
chflags.2 chmod.2 chown.2 chroot.2 clock_getcpuclockid2.2 \
clock_settime.2 clone.2 close.2 \
- connect.2 dup.2 execve.2 _exit.2 extattr_get_file.2 \
+ connect.2 dup.2 eventfd.2 execve.2 _exit.2 extattr_get_file.2 \
fcntl.2 fdatasync.2 fdiscard.2 fhopen.2 \
flock.2 fork.2 fsync.2 getcontext.2 getdents.2 \
getfh.2 getvfsstat.2 getgid.2 getgroups.2 \
@@ -284,7 +287,7 @@ MAN+= accept.2 access.2 acct.2 adjtime.2
socket.2 \
socketpair.2 stat.2 statvfs.2 swapctl.2 swapon.3 symlink.2 \
sync.2 sysarch.2 syscall.2 timer_create.2 timer_delete.2 \
- timer_settime.2 truncate.2 umask.2 undelete.2 \
+ timer_settime.2 timerfd.2 truncate.2 umask.2 undelete.2 \
unlink.2 utimes.2 utrace.2 uuidgen.2 vfork.2 wait.2 write.2
MLINKS+=_exit.2 _Exit.2
@@ -299,6 +302,8 @@ MLINKS+=chown.2 fchown.2 chown.2 lchown.
MLINKS+=chroot.2 fchroot.2
MLINKS+=clock_settime.2 clock_gettime.2
MLINKS+=clock_settime.2 clock_getres.2
+MLINKS+=eventfd.2 eventfd_read.2 \
+ eventfd.2 eventfd_write.2
MLINKS+=extattr_get_file.2 extattr_set_file.2 \
extattr_get_file.2 extattr_delete_file.2 \
extattr_get_file.2 extattr_list_file.2 \
@@ -380,6 +385,9 @@ MLINKS+=statvfs.2 fstatvfs.2
MLINKS+=statvfs.2 statvfs1.2
MLINKS+=statvfs.2 fstatvfs1.2
MLINKS+=syscall.2 __syscall.2
+MLINKS+=timerfd.2 timerfd_create.2 \
+ timerfd.2 timerfd_gettime.2 \
+ timerfd.2 timerfd_settime.2
MLINKS+=truncate.2 ftruncate.2
MLINKS+=unlink.2 unlinkat.2
MLINKS+=utimes.2 futimes.2 utimes.2 lutimes.2
Index: src/sys/kern/files.kern
diff -u src/sys/kern/files.kern:1.56 src/sys/kern/files.kern:1.57
--- src/sys/kern/files.kern:1.56 Tue May 18 05:16:09 2021
+++ src/sys/kern/files.kern Sun Sep 19 15:51:27 2021
@@ -1,4 +1,4 @@
-# $NetBSD: files.kern,v 1.56 2021/05/18 05:16:09 mrg Exp $
+# $NetBSD: files.kern,v 1.57 2021/09/19 15:51:27 thorpej Exp $
#
# kernel sources
@@ -158,6 +158,7 @@ file kern/subr_workqueue.c kern
file kern/subr_xcall.c kern
file kern/sys_aio.c aio
file kern/sys_descrip.c kern
+file kern/sys_eventfd.c kern
file kern/sys_futex.c kern
file kern/sys_generic.c kern
file kern/sys_getrandom.c kern
@@ -174,6 +175,7 @@ file kern/sys_select.c kern
file kern/sys_sig.c kern
file kern/sys_sched.c kern
file kern/sys_socket.c kern
+file kern/sys_timerfd.c kern
file kern/syscalls.c syscall_debug | kdtrace_hooks
file kern/sysv_ipc.c sysvshm | sysvsem | sysvmsg
file kern/sysv_msg.c sysvmsg
Index: src/sys/kern/syscalls.master
diff -u src/sys/kern/syscalls.master:1.307 src/sys/kern/syscalls.master:1.308
--- src/sys/kern/syscalls.master:1.307 Mon Nov 2 18:55:12 2020
+++ src/sys/kern/syscalls.master Sun Sep 19 15:51:27 2021
@@ -1,4 +1,4 @@
- $NetBSD: syscalls.master,v 1.307 2020/11/02 18:55:12 christos Exp $
+ $NetBSD: syscalls.master,v 1.308 2021/09/19 15:51:27 thorpej Exp $
; @(#)syscalls.master 8.2 (Berkeley) 1/13/94
@@ -398,9 +398,13 @@
#else
176 EXCL ntp_adjtime
#endif
-177 UNIMPL
-178 UNIMPL
-179 UNIMPL
+177 STD { int|sys||timerfd_create(clockid_t clock_id, \
+ int flags); }
+178 STD { int|sys||timerfd_settime(int fd, int flags, \
+ const struct itimerspec *new_value, \
+ struct itimerspec *old_value); }
+179 STD { int|sys||timerfd_gettime(int fd, \
+ struct itimerspec *curr_value); }
180 UNIMPL
; Syscalls 180-199 are used by/reserved for BSD
@@ -565,7 +569,7 @@
{ ssize_t|sys||mq_timedreceive(mqd_t mqdes, \
char *msg_ptr, size_t msg_len, unsigned *msg_prio, \
const struct timespec50 *abs_timeout); }
-267 UNIMPL
+267 STD { int|sys||eventfd(unsigned int val, int flags); }
268 UNIMPL
269 UNIMPL
270 STD RUMP { int|sys||__posix_rename(const char *from, \
Index: src/sys/sys/Makefile
diff -u src/sys/sys/Makefile:1.177 src/sys/sys/Makefile:1.178
--- src/sys/sys/Makefile:1.177 Wed Sep 15 17:33:08 2021
+++ src/sys/sys/Makefile Sun Sep 19 15:51:27 2021
@@ -1,4 +1,4 @@
-# $NetBSD: Makefile,v 1.177 2021/09/15 17:33:08 thorpej Exp $
+# $NetBSD: Makefile,v 1.178 2021/09/19 15:51:27 thorpej Exp $
.include <bsd.own.mk>
@@ -18,7 +18,7 @@ INCS= acct.h acl.h agpio.h aio.h ansi.h
dir.h dirent.h \
disk.h disklabel.h disklabel_acorn.h disklabel_gpt.h disklabel_rdb.h \
dkbad.h dkio.h dkstat.h domain.h drvctlio.h dvdio.h \
- endian.h envsys.h errno.h evcnt.h event.h exec.h exec_aout.h \
+ endian.h envsys.h errno.h evcnt.h event.h eventfd.h exec.h exec_aout.h \
exec_coff.h exec_ecoff.h exec_elf.h exec_script.h extattr.h extent.h \
fault.h \
fcntl.h fd_set.h fdio.h featuretest.h file.h filedesc.h filio.h \
@@ -42,7 +42,7 @@ INCS= acct.h acl.h agpio.h aio.h ansi.h
socketvar.h sockio.h spawn.h specificdata.h stat.h \
statvfs.h syscall.h syscallargs.h sysctl.h stdarg.h stdbool.h \
stdint.h swap.h syncobj.h syslimits.h syslog.h \
- tape.h termios.h time.h timeb.h timepps.h times.h timespec.h \
+ tape.h termios.h time.h timeb.h timepps.h timerfd.h times.h timespec.h \
timex.h tls.h trace.h tree.h tty.h ttychars.h ttycom.h \
ttydefaults.h ttydev.h types.h \
ucontext.h ucred.h uio.h un.h unistd.h unpcb.h utsname.h uuid.h \
Index: src/sys/sys/file.h
diff -u src/sys/sys/file.h:1.87 src/sys/sys/file.h:1.88
--- src/sys/sys/file.h:1.87 Sat Sep 11 10:08:55 2021
+++ src/sys/sys/file.h Sun Sep 19 15:51:27 2021
@@ -1,4 +1,4 @@
-/* $NetBSD: file.h,v 1.87 2021/09/11 10:08:55 riastradh Exp $ */
+/* $NetBSD: file.h,v 1.88 2021/09/19 15:51:27 thorpej Exp $ */
/*-
* Copyright (c) 2009 The NetBSD Foundation, Inc.
@@ -102,6 +102,8 @@ union file_data {
struct socket *fd_so; // DTYPE_SOCKET
struct pipe *fd_pipe; // DTYPE_PIPE
struct kqueue *fd_kq; // DTYPE_KQUEUE
+ struct eventfd *fd_eventfd; // DTYPE_EVENTFD
+ struct timerfd *fd_timerfd; // DTYPE_TIMERFD
void *fd_data; // DTYPE_MISC
struct audio_file *fd_audioctx; // DTYPE_MISC (audio)
struct pad_softc *fd_pad; // DTYPE_MISC (pad)
@@ -148,6 +150,8 @@ struct file {
#define f_data f_undata.fd_data
#define f_mqueue f_undata.fd_mq
#define f_ksem f_undata.fd_ks
+#define f_eventfd f_undata.fd_eventfd
+#define f_timerfd f_undata.fd_timerfd
#define f_rndctx f_undata.fd_rndctx
#define f_audioctx f_undata.fd_audioctx
@@ -170,10 +174,12 @@ struct file {
#define DTYPE_CRYPTO 6 /* crypto */
#define DTYPE_MQUEUE 7 /* message queue */
#define DTYPE_SEM 8 /* semaphore */
+#define DTYPE_EVENTFD 9 /* eventfd */
+#define DTYPE_TIMERFD 10 /* timerfd */
#define DTYPE_NAMES \
"0", "file", "socket", "pipe", "kqueue", "misc", "crypto", "mqueue", \
- "semaphore"
+ "semaphore", "eventfd", "timerfd"
#ifdef _KERNEL
Index: src/tests/lib/libc/sys/Makefile
diff -u src/tests/lib/libc/sys/Makefile:1.68 src/tests/lib/libc/sys/Makefile:1.69
--- src/tests/lib/libc/sys/Makefile:1.68 Sun Sep 6 07:20:31 2020
+++ src/tests/lib/libc/sys/Makefile Sun Sep 19 15:51:28 2021
@@ -1,4 +1,4 @@
-# $NetBSD: Makefile,v 1.68 2020/09/06 07:20:31 mrg Exp $
+# $NetBSD: Makefile,v 1.69 2021/09/19 15:51:28 thorpej Exp $
MKMAN= no
@@ -16,6 +16,7 @@ TESTS_C+= t_clock_nanosleep
TESTS_C+= t_clone
TESTS_C+= t_connect
TESTS_C+= t_dup
+TESTS_C+= t_eventfd
TESTS_C+= t_fork
TESTS_C+= t_fsync
TESTS_C+= t_futex_ops
@@ -81,6 +82,7 @@ TESTS_C+= t_swapcontext
TESTS_C+= t_stat
TESTS_C+= t_syscall
TESTS_C+= t_timer_create
+TESTS_C+= t_timerfd
TESTS_C+= t_truncate
TESTS_C+= t_ucontext
TESTS_C+= t_umask
@@ -93,7 +95,9 @@ TESTS_C+= t_write
SRCS.t_mprotect= t_mprotect.c ${SRCS_EXEC_PROT} t_mprotect_helper.c
-LDADD.t_getpid+= -lpthread
+LDADD.t_eventfd+= -lpthread
+LDADD.t_getpid+= -lpthread
+LDADD.t_timerfd+= -lpthread
LDADD.t_ptrace_sigchld+= -pthread -lm
Added files:
Index: src/lib/libc/sys/eventfd.2
diff -u /dev/null src/lib/libc/sys/eventfd.2:1.1
--- /dev/null Sun Sep 19 15:51:29 2021
+++ src/lib/libc/sys/eventfd.2 Sun Sep 19 15:51:28 2021
@@ -0,0 +1,271 @@
+.\" $NetBSD: eventfd.2,v 1.1 2021/09/19 15:51:28 thorpej Exp $
+.\"
+.\" Copyright (c) 2021 The NetBSD Foundation, Inc.
+.\" All rights reserved.
+.\"
+.\" This code is derived from software contributed to The NetBSD Foundation
+.\" by Jason R. Thorpe.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd September 17, 2021
+.Dt EVENTFD 2
+.Os
+.Sh NAME
+.Nm eventfd ,
+.Nm eventfd_read ,
+.Nm eventfd_write
+.Nd create and interact with a counting event descriptor
+.Sh SYNOPSIS
+.In sys/eventfd.h
+.Ft int
+.Fn eventfd "unsigned int val" "int flags"
+.Ft int
+.Fn eventfd_read "int efd" "eventfd_t *valp"
+.Ft int
+.Fn eventfd_write "int efd" "eventfd_t val"
+.Sh DESCRIPTION
+The
+.Nm
+interface presents a simple counting object associated with a file descriptor.
+Writes and reads to this file descriptor increment and decrement the count,
+respectively.
+When the object's value is non-zero, the file descriptor is considered
+.Dq readable ,
+and when the count is less than the maximum value
+.Po
+.Dv UINT64_MAX
+- 1
+.Pc
+it is considered
+.Dq writable .
+When an
+.Nm
+object is no longer needed, it may be disposed of using
+.Xr close 2 .
+.Pp
+All I/O to an
+.Nm
+object is 8 bytes in length, which is the space required to store an
+unsigned 64-bit integer.
+Any read or write with a buffer smaller than 8 bytes will fail with
+.Dv EINVAL .
+Only the first 8 bytes of the buffer will be used.
+.Pp
+The
+.Fn eventfd
+function creates a new counting event object and returns a file descriptor
+representing that object.
+The initial value of the object is specified by the
+.Fa val
+argument.
+The following flags define the behavior of the resulting object:
+.Bl -tag -width "EFD_SEMAPHORE"
+.It Dv EFD_CLOEXEC
+This is an alias for the
+.Dv O_CLOEXEC
+flag; see
+.Xr open 2
+for more information.
+.It Dv EFD_NONBLOCK
+This is an alias for the
+.Dv O_NONBLOCK
+flag; see
+.Xr open 2
+for more information.
+.It Dv EFD_SEMAPHORE
+Creates a
+.Dq semaphore mode
+object; see below for details.
+.El
+.Pp
+Reads from an
+.Nm
+object return an unsigned 64-bit integer in the caller's buffer.
+The semantics of this value are dependent on whether the
+.Nm
+object was created in
+.Dq semaphore mode :
+.Bl -bullet
+.It
+If the
+.Nm
+object was created in
+.Dq semaphore mode ,
+reads return the value
+.Dv 1
+and object's counter is decremented by
+.Dv 1 .
+.It
+If the
+.Nm
+object was not created in
+.Dq semaphore mode ,
+reads return the current value of the object's counter
+reset the counter to
+.Dv 0 .
+.El
+.Pp
+If the value of the
+.Nm
+object's counter is
+.Dv 0 ,
+then reads will block, unless the
+.Nm
+object is set for non-blocking I/O.
+.Pp
+Writing to an
+.Nm
+object adds the unsigned 64-bit value provided in the caller's buffer
+to the
+.Nm
+object's counter.
+If adding the specified value would exceed the maximum value, then the
+write will block, unless the
+.Nm
+object is set for non-blocking I/O.
+.Pp
+The convenience functions
+.Fn eventfd_read
+and
+.Fn eventfd_write
+are provided to simplify interacting with
+.Nm
+objects, and are simply wrappers around the
+.Xr read 2
+and
+.Xr write 2
+system calls:
+.Bl -tag -width "eventfd_writeXX"
+.It Fn eventfd_read
+Reads the unsigned 64-bit integer value of the
+.Nm
+object and returns it in
+.Fa valp .
+.It Fn eventfd_write
+Writes the unsigned 64-bit integer value
+.Fa val
+to the
+.Nm
+object.
+.El
+.Sh RETURN VALUES
+The
+.Fn eventfd
+system call returns
+.Dv -1
+if an error occurs, otherwise the return value is a descriptor representing the
+.Nm
+object.
+.Pp
+The
+.Fn eventfd_read
+and
+.Fn eventfd_write
+functions return
+.Dv 0
+upon success or
+.Dv -1
+if an error occurs.
+.Sh ERRORS
+The
+.Fn eventfd
+system call fails if:
+.Bl -tag -width Er
+.It Bq Er EINVAL
+Flags other than
+.Dv EFD_CLOEXEC ,
+.Dv EFD_NONBLOCK ,
+and
+.Dv EFD_SEMAPHORE
+are set in the
+.Fa flags
+argument.
+.It Bq Er EMFILE
+The per-process descriptor table is full.
+.It Bq Er ENFILE
+The system file table is full.
+.El
+.Pp
+The
+.Fn eventfd_read
+function fails if:
+.Bl -tag -width Er
+.It Bq Er EAGAIN
+The value of the
+.Nm
+object is
+.Dv 0
+and the
+.Nm
+object is set for non-blocking I/O.
+.El
+.Pp
+The
+.Fn eventfd_write
+function fails if:
+.Bl -tag -width Er
+.It Bq Er EAGAIN
+The resulting value of the
+.Nm
+object after adding the value
+.Fa val
+would exceed the maximum value
+.Po
+.Dv UINT64_MAX
+- 1
+.Pc
+and the
+.Nm
+object is set for non-blocking I/O.
+.It Bq Er EINVAL
+An attempt was made to write a value greater than the maximum value.
+.El
+.Pp
+In addition to the errors returned by
+.Fn eventfd_read
+and
+.Fn eventfd_write ,
+a read from or write to an
+.Nm
+object fails if:
+.Bl -tag -width Er
+.It Bq Er EINVAL
+The size of the buffer is less than 8 bytes
+.Pq the size required to hold an unsigned 64-bit integer .
+.El
+.Sh SEE ALSO
+.Xr close 2 ,
+.Xr kevent 2 ,
+.Xr open 2 ,
+.Xr poll 2 ,
+.Xr read 2 ,
+.Xr select 2 ,
+.Xr write 2
+.Sh HISTORY
+The
+.Nm
+interface first appeared in
+.Nx 10 .
+It is compatible with the
+.Nm
+interface that appeared in Linux 2.6.30.
Index: src/lib/libc/sys/eventfd_read.c
diff -u /dev/null src/lib/libc/sys/eventfd_read.c:1.1
--- /dev/null Sun Sep 19 15:51:29 2021
+++ src/lib/libc/sys/eventfd_read.c Sun Sep 19 15:51:28 2021
@@ -0,0 +1,60 @@
+/* $NetBSD: eventfd_read.c,v 1.1 2021/09/19 15:51:28 thorpej Exp $ */
+
+/*-
+ * Copyright (c) 2020 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason R. Thorpe.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/cdefs.h>
+#if defined(LIBC_SCCS) && !defined(lint)
+__RCSID("$NetBSD: eventfd_read.c,v 1.1 2021/09/19 15:51:28 thorpej Exp $");
+#endif /* LIBC_SCCS and not lint */
+
+#include "namespace.h"
+#include <sys/types.h>
+#include <sys/eventfd.h>
+#include <errno.h>
+#include <unistd.h>
+
+int
+eventfd_read(int efd, eventfd_t *valp)
+{
+ eventfd_t val;
+
+ switch (read(efd, &val, sizeof(val))) {
+ case -1:
+ return -1;
+
+ case sizeof(val):
+ *valp = val;
+ return 0;
+
+ default:
+ /* This should never happen, but... */
+ errno = EIO;
+ return -1;
+ }
+}
Index: src/lib/libc/sys/eventfd_write.c
diff -u /dev/null src/lib/libc/sys/eventfd_write.c:1.1
--- /dev/null Sun Sep 19 15:51:29 2021
+++ src/lib/libc/sys/eventfd_write.c Sun Sep 19 15:51:28 2021
@@ -0,0 +1,58 @@
+/* $NetBSD: eventfd_write.c,v 1.1 2021/09/19 15:51:28 thorpej Exp $ */
+
+/*-
+ * Copyright (c) 2020 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason R. Thorpe.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/cdefs.h>
+#if defined(LIBC_SCCS) && !defined(lint)
+__RCSID("$NetBSD: eventfd_write.c,v 1.1 2021/09/19 15:51:28 thorpej Exp $");
+#endif /* LIBC_SCCS and not lint */
+
+#include "namespace.h"
+#include <sys/types.h>
+#include <sys/eventfd.h>
+#include <errno.h>
+#include <unistd.h>
+
+int
+eventfd_write(int efd, eventfd_t val)
+{
+
+ switch (write(efd, &val, sizeof(val))) {
+ case -1:
+ return -1;
+
+ case sizeof(val):
+ return 0;
+
+ default:
+ /* This should never happen, but... */
+ errno = EIO;
+ return -1;
+ }
+}
Index: src/lib/libc/sys/timerfd.2
diff -u /dev/null src/lib/libc/sys/timerfd.2:1.1
--- /dev/null Sun Sep 19 15:51:29 2021
+++ src/lib/libc/sys/timerfd.2 Sun Sep 19 15:51:28 2021
@@ -0,0 +1,321 @@
+.\" $NetBSD: timerfd.2,v 1.1 2021/09/19 15:51:28 thorpej Exp $
+.\"
+.\" Copyright (c) 2021 The NetBSD Foundation, Inc.
+.\" All rights reserved.
+.\"
+.\" This code is derived from software contributed to The NetBSD Foundation
+.\" by Jason R. Thorpe.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd September 17, 2021
+.Dt TIMERFD 2
+.Os
+.Sh NAME
+.Nm timerfd ,
+.Nm timerfd_create ,
+.Nm timerfd_gettime ,
+.Nm timerfd_settime ,
+.Nd create and interact with a timer descriptor
+.Sh SYNOPSIS
+.In sys/timerfd.h
+.Ft int
+.Fn timerfd_create "clockid_t clockid" "int flags"
+.Ft int
+.Fn timerfd_gettime "int fd" "struct itimerspec *tim"
+.Ft int
+.Fn timerfd_settime "int fd" "int flags" \
+"const struct itimerspec *tim" "struct itimerspec *otim"
+.Sh DESCRIPTION
+.Nm
+presents an interface to interval timers associated with a file descriptor.
+These timers are functionally equivalent to per-process timers but are
+associated with a file descriptor, rather than a process.
+Because they are associated with a file descriptor, they may be passed
+to other processes, inherited across a fork, and multiplexed using
+.Xr kevent 2 ,
+.Xr poll 2 ,
+or
+.Xr select 2 .
+When a
+.Nm
+object is no longer needed, it may be disposed of using
+.Xr close 2 .
+.Pp
+The
+.Fn timerfd_create
+system call creates a
+.Nm
+object using the clock specified in the
+.Fa clockid
+argument.
+Valid values for
+.Fa clockid
+are
+.Dv CLOCK_REALTIME
+and
+.Dv CLOCK_MONOTONIC .
+The following flags define the behavior of the resulting object:
+.Bl -tag -width "TFD_NONBLOCK"
+.It Dv TFD_CLOEXEC
+This is an alias for the
+.Dv O_CLOEXEC
+flag; see
+.Xr open 2
+for more information.
+.It Dv TFD_NONBLOCK
+This is an alias for the
+.Dv O_NONBLOCK
+flag; see
+.Xr open 2
+for more information.
+.El
+.Pp
+Each time a
+.Nm
+timer expires, an internal counter is incremented.
+Reads from an
+.Nm
+object return the value of this counter in the caller's buffer as an
+unsigned 64-bit integer and reset the counter to
+.Dv 0 .
+If the value of the
+.Nm
+object's counter is
+.Dv 0 ,
+then reads will block, unless the
+.Nm
+object is set for non-blocking I/O.
+.Pp
+Writes to a
+.Nm
+object are not supported.
+.Pp
+The
+.Fn timerfd_settime
+system call sets the next expiration time of the
+.Nm
+object to the
+.Va it_value
+.Po
+see
+.Xr itimerspec 3
+.Pc
+specified in the
+.Fa tim
+argument.
+If the value is
+.Dv 0 ,
+the timer is disarmed.
+If the argument
+.Fa otim
+is not
+.Dv NULL
+the old timer settings are returned.
+The following flags may be specified to alter the behavior of the timer:
+.Bl -tag -width "TFD_TIMER_CANCEL_ON_SET"
+.It Dv TFD_TIMER_ABSTIME
+The specified timer value is an absolute time.
+This is equivalent to specifying
+.Dv TIMER_ABSTIME
+to
+.Xr timer_settime 2 .
+Otherwise, the time value is a relative time, equivalent to specifying
+.Dv TIMER_RELTIME
+to
+.Xr timer_settime 2 .
+.It Dv TFD_TIMER_CANCEL_ON_SET
+If the
+.Nm
+object's clock ID is
+.Dv CLOCK_REALTIME ,
+then the timer will be cancelled and its file descriptor will become
+immediately readable if the system realtime clock is set using
+.Xr clock_settime 2
+or
+.Xr settimeofday 2 .
+If the
+.Nm
+object's clock ID is not
+.Dv CLOCK_REALTIME
+this flag is ignored.
+.El
+.Pp
+If the
+.Va it_interval
+of the
+.Fa tim
+argument is non-zero, then the timer reloads upon expiration.
+.Pp
+The
+.Fn timerfd_gettime
+system call returns the current settings of the
+.Nm
+object in the
+.Fa tim
+argument.
+.Sh RETURN VALUES
+The
+.Fn timerfd_create
+system call returns
+.Dv -1
+if an error occurs, otherwise the return value is a descriptor representing the
+.Nm
+object.
+.Pp
+The
+.Fn timerfd_gettime
+and
+.Fn timerfd_settime
+system calls return
+.Dv 0
+upon success or
+.Dv -1
+if an error occurs.
+.Sh ERRORS
+The
+.Fn timerfd
+system call fails if:
+.Bl -tag -width Er
+.It Bq Er EINVAL
+Flags other than
+.Dv TFD_CLOEXEC
+and
+.Dv TFD_NONBLOCK
+are set in the
+.Fa flags
+argument.
+.It Bq Er EINVAL
+The
+.Fa clockid
+argument was something other than
+.Dv CLOCK_REALTIME
+or
+.Dv CLOCK_MONOTONIC .
+.It Bq Er EMFILE
+The per-process descriptor table is full.
+.It Bq Er ENFILE
+The system file table is full.
+.El
+.Pp
+The
+.Fn timerfd_gettime
+system call fails if:
+.Bl -tag -width Er
+.It Bq Er EBADF
+The argument
+.Fa fd
+is not a valid file descriptor.
+.It Bq Er EINVAL
+The argument
+.Fa fd
+does not refer to a
+.Nm timerfd
+object.
+.It Bq Er EFAULT
+The
+.Fa tim
+argument points outside the allocated address space.
+.El
+.Pp
+The
+.Fn timerfd_settime
+system call fails if:
+.Bl -tag -width Er
+.It Bq Er EBADF
+The argument
+.Fa fd
+is not a valid file descriptor.
+.It Bq Er EINVAL
+The argument
+.Fa fd
+does not refer to a
+.Nm timerfd
+object.
+.It Bq Er EINVAL
+Bits other than the defined
+.Dv TFD_TIMER_ABSTIME
+and
+.Dv TFD_TIMER_CANCEL_ON_SET
+bits are set in the
+.Fa flags
+argument.
+.It Bq Er EINVAL
+A nanosecond field in the
+.Fa tim
+argument specified a value less than zero or greater than or equal to
+.Dv 10e9 .
+.It Bq Er EFAULT
+The
+.Fa tim
+or
+.Fa otim
+arguments point outside the allocated address space.
+.El
+.Pp
+A read from a
+.Nm
+object fails if:
+.Bl -tag -width Er
+.It Bq Er EINVAL
+The size of the read buffer is less than 8 bytes
+.Pq the size required to hold an unsigned 64-bit integer .
+.It Bq Er EAGAIN
+The value of the
+.Nm
+object's expiration counter is
+.Dv 0
+and the
+.Nm
+object is set for non-blocking I/O.
+.It Bq Er ECANCELED
+The
+.Nm
+object was created with the clock ID
+.Dv CLOCK_REALTIME ,
+was configured with the
+.Dv TFD_TIMER_CANCEL_ON_SET
+flag, and the system realtime clock was changed with
+.Xr clock_settime 2
+or
+.Xr settimeofday 2 .
+.El
+.Sh SEE ALSO
+.Xr clock_settime 2 ,
+.Xr close 2 ,
+.Xr kevent 2 ,
+.Xr open 2 ,
+.Xr poll 2 ,
+.Xr read 2 ,
+.Xr select 2 ,
+.Xr settimeofday 2 ,
+.Xr timer_create 2 ,
+.Xr timer_gettime 2 ,
+.Xr timer_settime 2
+.Sh HISTORY
+The
+.Nm
+interface first appeared in
+.Nx 10 .
+It is compatible with the
+.Nm
+interface that appeared in Linux 2.6.25.
Index: src/sys/kern/sys_eventfd.c
diff -u /dev/null src/sys/kern/sys_eventfd.c:1.2
--- /dev/null Sun Sep 19 15:51:29 2021
+++ src/sys/kern/sys_eventfd.c Sun Sep 19 15:51:27 2021
@@ -0,0 +1,579 @@
+/* $NetBSD: sys_eventfd.c,v 1.2 2021/09/19 15:51:27 thorpej Exp $ */
+
+/*-
+ * Copyright (c) 2020 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason R. Thorpe.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: sys_eventfd.c,v 1.2 2021/09/19 15:51:27 thorpej Exp $");
+
+/*
+ * eventfd
+ *
+ * Eventfd objects present a simple counting object associated with a
+ * file descriptor. Writes and reads to this file descriptor increment
+ * and decrement the count, respectively. When the count is non-zero,
+ * the descriptor is considered "readable", and when less than the max
+ * value (EVENTFD_MAXVAL), is considered "writable".
+ *
+ * This implementation is API compatible with the Linux eventfd(2)
+ * interface.
+ */
+
+#include <sys/types.h>
+#include <sys/condvar.h>
+#include <sys/eventfd.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/kauth.h>
+#include <sys/mutex.h>
+#include <sys/poll.h>
+#include <sys/proc.h>
+#include <sys/select.h>
+#include <sys/stat.h>
+#include <sys/syscallargs.h>
+#include <sys/uio.h>
+
+struct eventfd {
+ kmutex_t efd_lock;
+ kcondvar_t efd_read_wait;
+ kcondvar_t efd_write_wait;
+ kcondvar_t efd_restart_wait;
+ struct selinfo efd_read_sel;
+ struct selinfo efd_write_sel;
+ eventfd_t efd_val;
+ int64_t efd_nwaiters;
+ bool efd_restarting;
+ bool efd_has_read_waiters;
+ bool efd_has_write_waiters;
+ bool efd_is_semaphore;
+
+ /*
+ * Information kept for stat(2).
+ */
+ struct timespec efd_btime; /* time created */
+ struct timespec efd_mtime; /* last write */
+ struct timespec efd_atime; /* last read */
+};
+
+#define EVENTFD_MAXVAL (UINT64_MAX - 1)
+
+/*
+ * eventfd_create:
+ *
+ * Create an eventfd object.
+ */
+static struct eventfd *
+eventfd_create(unsigned int const val, int const flags)
+{
+ struct eventfd * const efd = kmem_zalloc(sizeof(*efd), KM_SLEEP);
+
+ mutex_init(&efd->efd_lock, MUTEX_DEFAULT, IPL_NONE);
+ cv_init(&efd->efd_read_wait, "efdread");
+ cv_init(&efd->efd_write_wait, "efdwrite");
+ cv_init(&efd->efd_restart_wait, "efdrstrt");
+ selinit(&efd->efd_read_sel);
+ selinit(&efd->efd_write_sel);
+ efd->efd_val = val;
+ efd->efd_is_semaphore = !!(flags & EFD_SEMAPHORE);
+ getnanotime(&efd->efd_btime);
+
+ /* Caller deals with EFD_CLOEXEC and EFD_NONBLOCK. */
+
+ return efd;
+}
+
+/*
+ * eventfd_destroy:
+ *
+ * Destroy an eventfd object.
+ */
+static void
+eventfd_destroy(struct eventfd * const efd)
+{
+
+ KASSERT(efd->efd_nwaiters == 0);
+ KASSERT(efd->efd_restarting == false);
+ KASSERT(efd->efd_has_read_waiters == false);
+ KASSERT(efd->efd_has_write_waiters == false);
+
+ cv_destroy(&efd->efd_read_wait);
+ cv_destroy(&efd->efd_write_wait);
+ cv_destroy(&efd->efd_restart_wait);
+
+ seldestroy(&efd->efd_read_sel);
+ seldestroy(&efd->efd_write_sel);
+
+ mutex_destroy(&efd->efd_lock);
+}
+
+/*
+ * eventfd_wait:
+ *
+ * Block on an eventfd. Handles non-blocking, as well as
+ * the restart cases.
+ */
+static int
+eventfd_wait(struct eventfd * const efd, int const fflag, bool const is_write)
+{
+ kcondvar_t *waitcv;
+ int error;
+
+ if (fflag & FNONBLOCK) {
+ return EAGAIN;
+ }
+
+ /*
+ * We're going to block. If there is a restart in-progress,
+ * wait for that to complete first.
+ */
+ while (efd->efd_restarting) {
+ cv_wait(&efd->efd_restart_wait, &efd->efd_lock);
+ }
+
+ if (is_write) {
+ efd->efd_has_write_waiters = true;
+ waitcv = &efd->efd_write_wait;
+ } else {
+ efd->efd_has_read_waiters = true;
+ waitcv = &efd->efd_read_wait;
+ }
+
+ efd->efd_nwaiters++;
+ KASSERT(efd->efd_nwaiters > 0);
+ error = cv_wait_sig(waitcv, &efd->efd_lock);
+ efd->efd_nwaiters--;
+ KASSERT(efd->efd_nwaiters >= 0);
+
+ /*
+ * If a restart was triggered while we were asleep, we need
+ * to return ERESTART if no other error was returned. If we
+ * are the last waiter coming out of the restart drain, clear
+ * the condition.
+ */
+ if (efd->efd_restarting) {
+ if (error == 0) {
+ error = ERESTART;
+ }
+ if (efd->efd_nwaiters == 0) {
+ efd->efd_restarting = false;
+ cv_broadcast(&efd->efd_restart_wait);
+ }
+ }
+
+ return error;
+}
+
+/*
+ * eventfd_wake:
+ *
+ * Wake LWPs block on an eventfd.
+ */
+static void
+eventfd_wake(struct eventfd * const efd, bool const is_write)
+{
+ kcondvar_t *waitcv = NULL;
+ struct selinfo *sel;
+ int pollev;
+
+ if (is_write) {
+ if (efd->efd_has_read_waiters) {
+ waitcv = &efd->efd_read_wait;
+ efd->efd_has_read_waiters = false;
+ }
+ sel = &efd->efd_read_sel;
+ pollev = POLLIN | POLLRDNORM;
+ } else {
+ if (efd->efd_has_write_waiters) {
+ waitcv = &efd->efd_write_wait;
+ efd->efd_has_write_waiters = false;
+ }
+ sel = &efd->efd_write_sel;
+ pollev = POLLOUT | POLLWRNORM;
+ }
+ if (waitcv != NULL) {
+ cv_broadcast(waitcv);
+ }
+ selnotify(sel, pollev, NOTE_SUBMIT);
+}
+
+/*
+ * eventfd file operations
+ */
+
+static int
+eventfd_fop_read(file_t * const fp, off_t * const offset,
+ struct uio * const uio, kauth_cred_t const cred, int const flags)
+{
+ struct eventfd * const efd = fp->f_eventfd;
+ int const fflag = fp->f_flag;
+ eventfd_t return_value;
+ int error;
+
+ if (uio->uio_resid < sizeof(eventfd_t)) {
+ return EINVAL;
+ }
+
+ mutex_enter(&efd->efd_lock);
+
+ while (efd->efd_val == 0) {
+ if ((error = eventfd_wait(efd, fflag, false)) != 0) {
+ mutex_exit(&efd->efd_lock);
+ return error;
+ }
+ }
+
+ if (efd->efd_is_semaphore) {
+ return_value = 1;
+ efd->efd_val--;
+ } else {
+ return_value = efd->efd_val;
+ efd->efd_val = 0;
+ }
+
+ getnanotime(&efd->efd_atime);
+ eventfd_wake(efd, false);
+
+ mutex_exit(&efd->efd_lock);
+
+ error = uiomove(&return_value, sizeof(return_value), uio);
+
+ return error;
+}
+
+static int
+eventfd_fop_write(file_t * const fp, off_t * const offset,
+ struct uio * const uio, kauth_cred_t const cred, int const flags)
+{
+ struct eventfd * const efd = fp->f_eventfd;
+ int const fflag = fp->f_flag;
+ eventfd_t write_value;
+ int error;
+
+ if (uio->uio_resid < sizeof(eventfd_t)) {
+ return EINVAL;
+ }
+
+ if ((error = uiomove(&write_value, sizeof(write_value), uio)) != 0) {
+ return error;
+ }
+
+ if (write_value > EVENTFD_MAXVAL) {
+ error = EINVAL;
+ goto out;
+ }
+
+ mutex_enter(&efd->efd_lock);
+
+ KASSERT(efd->efd_val <= EVENTFD_MAXVAL);
+ while ((EVENTFD_MAXVAL - efd->efd_val) < write_value) {
+ if ((error = eventfd_wait(efd, fflag, true)) != 0) {
+ mutex_exit(&efd->efd_lock);
+ goto out;
+ }
+ }
+
+ efd->efd_val += write_value;
+ KASSERT(efd->efd_val <= EVENTFD_MAXVAL);
+
+ getnanotime(&efd->efd_mtime);
+ eventfd_wake(efd, true);
+
+ mutex_exit(&efd->efd_lock);
+
+ out:
+ if (error) {
+ /*
+ * Undo the effect of uiomove() so that the error
+ * gets reported correctly; see dofilewrite().
+ */
+ uio->uio_resid += sizeof(write_value);
+ }
+ return error;
+}
+
+static int
+eventfd_fop_poll(file_t * const fp, int const events)
+{
+ struct eventfd * const efd = fp->f_eventfd;
+ int revents = 0;
+
+ /*
+ * Note that Linux will return POLLERR if the eventfd count
+ * overflows, but that is not possible in the normal read/write
+ * API, only with Linux kernel-internal interfaces. So, this
+ * implementation never returns POLLERR.
+ *
+ * Also note that the Linux eventfd(2) man page does not
+ * specifically discuss returning POLLRDNORM, but we check
+ * for that event in addition to POLLIN.
+ */
+
+ mutex_enter(&efd->efd_lock);
+
+ if (events & (POLLIN | POLLRDNORM)) {
+ if (efd->efd_val != 0) {
+ revents |= events & (POLLIN | POLLRDNORM);
+ } else {
+ selrecord(curlwp, &efd->efd_read_sel);
+ }
+ }
+
+ if (events & (POLLOUT | POLLWRNORM)) {
+ if (efd->efd_val < EVENTFD_MAXVAL) {
+ revents |= events & (POLLOUT | POLLWRNORM);
+ } else {
+ selrecord(curlwp, &efd->efd_write_sel);
+ }
+ }
+
+ mutex_exit(&efd->efd_lock);
+
+ return revents;
+}
+
+static int
+eventfd_fop_stat(file_t * const fp, struct stat * const st)
+{
+ struct eventfd * const efd = fp->f_eventfd;
+
+ memset(st, 0, sizeof(*st));
+
+ mutex_enter(&efd->efd_lock);
+ st->st_size = (off_t)efd->efd_val;
+ st->st_blksize = sizeof(eventfd_t);
+ st->st_mode = S_IFIFO | S_IRUSR | S_IWUSR;
+ st->st_blocks = 1;
+ st->st_birthtimespec = st->st_ctimespec = efd->efd_btime;
+ st->st_atimespec = efd->efd_atime;
+ st->st_mtimespec = efd->efd_mtime;
+ st->st_uid = kauth_cred_geteuid(fp->f_cred);
+ st->st_gid = kauth_cred_getegid(fp->f_cred);
+ mutex_exit(&efd->efd_lock);
+
+ return 0;
+}
+
+static int
+eventfd_fop_close(file_t * const fp)
+{
+ struct eventfd * const efd = fp->f_eventfd;
+
+ fp->f_eventfd = NULL;
+ eventfd_destroy(efd);
+
+ return 0;
+}
+
+static void
+eventfd_filt_read_detach(struct knote * const kn)
+{
+ struct eventfd * const efd = ((file_t *)kn->kn_obj)->f_eventfd;
+
+ mutex_enter(&efd->efd_lock);
+ KASSERT(kn->kn_hook == efd);
+ selremove_knote(&efd->efd_read_sel, kn);
+ mutex_exit(&efd->efd_lock);
+}
+
+static int
+eventfd_filt_read(struct knote * const kn, long const hint)
+{
+ struct eventfd * const efd = ((file_t *)kn->kn_obj)->f_eventfd;
+
+ if (hint & NOTE_SUBMIT) {
+ KASSERT(mutex_owned(&efd->efd_lock));
+ } else {
+ mutex_enter(&efd->efd_lock);
+ }
+
+ kn->kn_data = (int64_t)efd->efd_val;
+
+ if ((hint & NOTE_SUBMIT) == 0) {
+ mutex_exit(&efd->efd_lock);
+ }
+
+ return (eventfd_t)kn->kn_data > 0;
+}
+
+static const struct filterops eventfd_read_filterops = {
+ .f_isfd = 1,
+ .f_detach = eventfd_filt_read_detach,
+ .f_event = eventfd_filt_read,
+};
+
+static void
+eventfd_filt_write_detach(struct knote * const kn)
+{
+ struct eventfd * const efd = ((file_t *)kn->kn_obj)->f_eventfd;
+
+ mutex_enter(&efd->efd_lock);
+ KASSERT(kn->kn_hook == efd);
+ selremove_knote(&efd->efd_write_sel, kn);
+ mutex_exit(&efd->efd_lock);
+}
+
+static int
+eventfd_filt_write(struct knote * const kn, long const hint)
+{
+ struct eventfd * const efd = ((file_t *)kn->kn_obj)->f_eventfd;
+
+ if (hint & NOTE_SUBMIT) {
+ KASSERT(mutex_owned(&efd->efd_lock));
+ } else {
+ mutex_enter(&efd->efd_lock);
+ }
+
+ kn->kn_data = (int64_t)efd->efd_val;
+
+ if ((hint & NOTE_SUBMIT) == 0) {
+ mutex_exit(&efd->efd_lock);
+ }
+
+ return (eventfd_t)kn->kn_data < EVENTFD_MAXVAL;
+}
+
+static const struct filterops eventfd_write_filterops = {
+ .f_isfd = 1,
+ .f_detach = eventfd_filt_write_detach,
+ .f_event = eventfd_filt_write,
+};
+
+static int
+eventfd_fop_kqfilter(file_t * const fp, struct knote * const kn)
+{
+ struct eventfd * const efd = ((file_t *)kn->kn_obj)->f_eventfd;
+ struct selinfo *sel;
+
+ switch (kn->kn_filter) {
+ case EVFILT_READ:
+ sel = &efd->efd_read_sel;
+ kn->kn_fop = &eventfd_read_filterops;
+ break;
+
+ case EVFILT_WRITE:
+ sel = &efd->efd_write_sel;
+ kn->kn_fop = &eventfd_write_filterops;
+ break;
+
+ default:
+ return EINVAL;
+ }
+
+ kn->kn_hook = efd;
+
+ mutex_enter(&efd->efd_lock);
+ selrecord_knote(sel, kn);
+ mutex_exit(&efd->efd_lock);
+
+ return 0;
+}
+
+static void
+eventfd_fop_restart(file_t * const fp)
+{
+ struct eventfd * const efd = fp->f_eventfd;
+
+ /*
+ * Unblock blocked reads/writes in order to allow close() to complete.
+ * System calls return ERESTART so that the fd is revalidated.
+ */
+
+ mutex_enter(&efd->efd_lock);
+
+ if (efd->efd_nwaiters != 0) {
+ efd->efd_restarting = true;
+ if (efd->efd_has_read_waiters) {
+ cv_broadcast(&efd->efd_read_wait);
+ efd->efd_has_read_waiters = false;
+ }
+ if (efd->efd_has_write_waiters) {
+ cv_broadcast(&efd->efd_write_wait);
+ efd->efd_has_write_waiters = false;
+ }
+ }
+
+ mutex_exit(&efd->efd_lock);
+}
+
+static const struct fileops eventfd_fileops = {
+ .fo_name = "eventfd",
+ .fo_read = eventfd_fop_read,
+ .fo_write = eventfd_fop_write,
+ .fo_ioctl = fbadop_ioctl,
+ .fo_fcntl = fnullop_fcntl,
+ .fo_poll = eventfd_fop_poll,
+ .fo_stat = eventfd_fop_stat,
+ .fo_close = eventfd_fop_close,
+ .fo_kqfilter = eventfd_fop_kqfilter,
+ .fo_restart = eventfd_fop_restart,
+};
+
+/*
+ * eventfd(2) system call
+ */
+int
+do_eventfd(struct lwp * const l, unsigned int const val, int const flags,
+ register_t *retval)
+{
+ file_t *fp;
+ int fd, error;
+
+ if (flags & ~(EFD_CLOEXEC | EFD_NONBLOCK | EFD_SEMAPHORE)) {
+ return EINVAL;
+ }
+
+ if ((error = fd_allocfile(&fp, &fd)) != 0) {
+ return error;
+ }
+
+ fp->f_flag = FREAD | FWRITE;
+ if (flags & EFD_NONBLOCK) {
+ fp->f_flag |= FNONBLOCK;
+ }
+ fp->f_type = DTYPE_EVENTFD;
+ fp->f_ops = &eventfd_fileops;
+ fp->f_eventfd = eventfd_create(val, flags);
+ fd_set_exclose(l, fd, !!(flags & EFD_CLOEXEC));
+ fd_affix(curproc, fp, fd);
+
+ *retval = fd;
+ return 0;
+}
+
+int
+sys_eventfd(struct lwp *l, const struct sys_eventfd_args *uap,
+ register_t *retval)
+{
+ /* {
+ syscallarg(unsigned int) val;
+ syscallarg(int) flags;
+ } */
+
+ return do_eventfd(l, SCARG(uap, val), SCARG(uap, flags), retval);
+}
Index: src/sys/kern/sys_timerfd.c
diff -u /dev/null src/sys/kern/sys_timerfd.c:1.2
--- /dev/null Sun Sep 19 15:51:29 2021
+++ src/sys/kern/sys_timerfd.c Sun Sep 19 15:51:27 2021
@@ -0,0 +1,691 @@
+/* $NetBSD: sys_timerfd.c,v 1.2 2021/09/19 15:51:27 thorpej Exp $ */
+
+/*-
+ * Copyright (c) 2020 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason R. Thorpe.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: sys_timerfd.c,v 1.2 2021/09/19 15:51:27 thorpej Exp $");
+
+/*
+ * timerfd
+ *
+ * Timerfd objects are similar to POSIX timers, except they are associated
+ * with a file descriptor rather than a process. Timerfd objects are
+ * created with the timerfd_create(2) system call, similar to timer_create(2).
+ * The timerfd analogues for timer_gettime(2) and timer_settime(2) are
+ * timerfd_gettime(2) and timerfd_settime(2), respectively.
+ *
+ * When a timerfd object's timer fires, an internal counter is incremented.
+ * When this counter is non-zero, the descriptor associated with the timerfd
+ * object is "readable". Note that this is slightly different than the
+ * POSIX timer "overrun" counter, which only increments if the timer fires
+ * again while the notification signal is already pending. Thus, we are
+ * responsible for incrementing the "overrun" counter each time the timerfd
+ * timer fires.
+ *
+ * This implementation is API compatible with the Linux timerfd interface.
+ */
+
+#include <sys/types.h>
+#include <sys/condvar.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/kauth.h>
+#include <sys/mutex.h>
+#include <sys/poll.h>
+#include <sys/proc.h>
+#include <sys/select.h>
+#include <sys/stat.h>
+#include <sys/syscallargs.h>
+#include <sys/timerfd.h>
+#include <sys/uio.h>
+
+/* N.B. all timerfd state is protected by itimer_lock() */
+struct timerfd {
+ struct itimer tfd_itimer;
+ kcondvar_t tfd_read_wait;
+ kcondvar_t tfd_restart_wait;
+ struct selinfo tfd_read_sel;
+ int64_t tfd_nwaiters;
+ bool tfd_cancel_on_set;
+ bool tfd_cancelled;
+ bool tfd_restarting;
+
+ /*
+ * Information kept for stat(2).
+ */
+ struct timespec tfd_btime; /* time created */
+ struct timespec tfd_mtime; /* last timerfd_settime() */
+ struct timespec tfd_atime; /* last read */
+};
+
+static void timerfd_wake(struct timerfd *);
+
+static inline uint64_t
+timerfd_fire_count(const struct timerfd * const tfd)
+{
+ return (unsigned int)tfd->tfd_itimer.it_overruns;
+}
+
+static inline bool
+timerfd_is_readable(const struct timerfd * const tfd)
+{
+ return tfd->tfd_itimer.it_overruns != 0 || tfd->tfd_cancelled;
+}
+
+/*
+ * timerfd_fire:
+ *
+ * Called when the timerfd's timer fires.
+ *
+ * Called from a callout with itimer lock held.
+ */
+static void
+timerfd_fire(struct itimer * const it)
+{
+ struct timerfd * const tfd =
+ container_of(it, struct timerfd, tfd_itimer);
+
+ it->it_overruns++;
+ timerfd_wake(tfd);
+}
+
+/*
+ * timerfd_realtime_changed:
+ *
+ * Called when CLOCK_REALTIME is changed with clock_settime()
+ * or settimeofday().
+ *
+ * Called with itimer lock held.
+ */
+static void
+timerfd_realtime_changed(struct itimer * const it)
+{
+ struct timerfd * const tfd =
+ container_of(it, struct timerfd, tfd_itimer);
+
+ /* Should only be called when timer is armed. */
+ KASSERT(timespecisset(&it->it_time.it_value));
+
+ if (tfd->tfd_cancel_on_set) {
+ tfd->tfd_cancelled = true;
+ timerfd_wake(tfd);
+ }
+}
+
+static const struct itimer_ops timerfd_itimer_monotonic_ops = {
+ .ito_fire = timerfd_fire,
+};
+
+static const struct itimer_ops timerfd_itimer_realtime_ops = {
+ .ito_fire = timerfd_fire,
+ .ito_realtime_changed = timerfd_realtime_changed,
+};
+
+/*
+ * timerfd_create:
+ *
+ * Create a timerfd object.
+ */
+static struct timerfd *
+timerfd_create(clockid_t const clock_id, int const flags)
+{
+ struct timerfd * const tfd = kmem_zalloc(sizeof(*tfd), KM_SLEEP);
+
+ KASSERT(clock_id == CLOCK_REALTIME || clock_id == CLOCK_MONOTONIC);
+
+ cv_init(&tfd->tfd_read_wait, "tfdread");
+ cv_init(&tfd->tfd_restart_wait, "tfdrstrt");
+ selinit(&tfd->tfd_read_sel);
+ getnanotime(&tfd->tfd_btime);
+
+ /* Caller deals with TFD_CLOEXEC and TFD_NONBLOCK. */
+
+ itimer_lock();
+ itimer_init(&tfd->tfd_itimer,
+ clock_id == CLOCK_REALTIME ? &timerfd_itimer_realtime_ops
+ : &timerfd_itimer_monotonic_ops,
+ clock_id, NULL);
+ itimer_unlock();
+
+ return tfd;
+}
+
+/*
+ * timerfd_destroy:
+ *
+ * Destroy a timerfd object.
+ */
+static void
+timerfd_destroy(struct timerfd * const tfd)
+{
+
+ KASSERT(tfd->tfd_nwaiters == 0);
+ KASSERT(tfd->tfd_restarting == false);
+
+ itimer_lock();
+ itimer_poison(&tfd->tfd_itimer);
+ itimer_fini(&tfd->tfd_itimer); /* drops itimer lock */
+
+ cv_destroy(&tfd->tfd_read_wait);
+ cv_destroy(&tfd->tfd_restart_wait);
+
+ seldestroy(&tfd->tfd_read_sel);
+
+ kmem_free(tfd, sizeof(*tfd));
+}
+
+/*
+ * timerfd_wait:
+ *
+ * Block on a timerfd. Handles non-blocking, as well as
+ * the restart cases.
+ */
+static int
+timerfd_wait(struct timerfd * const tfd, int const fflag)
+{
+ extern kmutex_t itimer_mutex; /* XXX */
+ int error;
+
+ if (fflag & FNONBLOCK) {
+ return EAGAIN;
+ }
+
+ /*
+ * We're going to block. If there is a restart in-progress,
+ * wait for that to complete first.
+ */
+ while (tfd->tfd_restarting) {
+ cv_wait(&tfd->tfd_restart_wait, &itimer_mutex);
+ }
+
+ tfd->tfd_nwaiters++;
+ KASSERT(tfd->tfd_nwaiters > 0);
+ error = cv_wait_sig(&tfd->tfd_read_wait, &itimer_mutex);
+ tfd->tfd_nwaiters--;
+ KASSERT(tfd->tfd_nwaiters >= 0);
+
+ /*
+ * If a restart was triggered while we were asleep, we need
+ * to return ERESTART if no other error was returned. If we
+ * are the last waiter coming out of the restart drain, clear
+ * the condition.
+ */
+ if (tfd->tfd_restarting) {
+ if (error == 0) {
+ error = ERESTART;
+ }
+ if (tfd->tfd_nwaiters == 0) {
+ tfd->tfd_restarting = false;
+ cv_broadcast(&tfd->tfd_restart_wait);
+ }
+ }
+
+ return error;
+}
+
+/*
+ * timerfd_wake:
+ *
+ * Wake LWPs blocked on a timerfd.
+ */
+static void
+timerfd_wake(struct timerfd * const tfd)
+{
+
+ if (tfd->tfd_nwaiters) {
+ cv_broadcast(&tfd->tfd_read_wait);
+ }
+ selnotify(&tfd->tfd_read_sel, POLLIN | POLLRDNORM, NOTE_SUBMIT);
+}
+
+/*
+ * timerfd file operations
+ */
+
+static int
+timerfd_fop_read(file_t * const fp, off_t * const offset,
+ struct uio * const uio, kauth_cred_t const cred, int const flags)
+{
+ struct timerfd * const tfd = fp->f_timerfd;
+ struct itimer * const it = &tfd->tfd_itimer;
+ int const fflag = fp->f_flag;
+ uint64_t return_value;
+ int error;
+
+ if (uio->uio_resid < sizeof(uint64_t)) {
+ return EINVAL;
+ }
+
+ itimer_lock();
+
+ while (!timerfd_is_readable(tfd)) {
+ if ((error = timerfd_wait(tfd, fflag)) != 0) {
+ itimer_unlock();
+ return error;
+ }
+ }
+
+ if (tfd->tfd_cancelled) {
+ itimer_unlock();
+ return ECANCELED;
+ }
+
+ return_value = timerfd_fire_count(tfd);
+ it->it_overruns = 0;
+
+ getnanotime(&tfd->tfd_atime);
+
+ itimer_unlock();
+
+ error = uiomove(&return_value, sizeof(return_value), uio);
+
+ return error;
+}
+
+static int
+timerfd_fop_ioctl(file_t * const fp, unsigned long const cmd, void * const data)
+{
+ struct timerfd * const tfd = fp->f_timerfd;
+ int error = 0;
+
+ switch (cmd) {
+ case TFD_IOC_SET_TICKS: {
+ const uint64_t * const new_ticksp = data;
+ if (*new_ticksp > INT_MAX) {
+ return EINVAL;
+ }
+ itimer_lock();
+ tfd->tfd_itimer.it_overruns = (int)*new_ticksp;
+ itimer_unlock();
+ break;
+ }
+
+ default:
+ error = EPASSTHROUGH;
+ }
+
+ return error;
+}
+
+static int
+timerfd_fop_poll(file_t * const fp, int const events)
+{
+ struct timerfd * const tfd = fp->f_timerfd;
+ int revents = events & (POLLOUT | POLLWRNORM);
+
+ if (events & (POLLIN | POLLRDNORM)) {
+ itimer_lock();
+ if (timerfd_is_readable(tfd)) {
+ revents |= events & (POLLIN | POLLRDNORM);
+ } else {
+ selrecord(curlwp, &tfd->tfd_read_sel);
+ }
+ itimer_unlock();
+ }
+
+ return revents;
+}
+
+static int
+timerfd_fop_stat(file_t * const fp, struct stat * const st)
+{
+ struct timerfd * const tfd = fp->f_timerfd;
+
+ memset(st, 0, sizeof(*st));
+
+ itimer_lock();
+ st->st_size = (off_t)timerfd_fire_count(tfd);
+ st->st_atimespec = tfd->tfd_atime;
+ st->st_mtimespec = tfd->tfd_mtime;
+ itimer_unlock();
+
+ st->st_blksize = sizeof(uint64_t);
+ st->st_mode = S_IFIFO | S_IRUSR | S_IWUSR;
+ st->st_blocks = 1;
+ st->st_birthtimespec = tfd->tfd_btime;
+ st->st_ctimespec = st->st_mtimespec;
+ st->st_uid = kauth_cred_geteuid(fp->f_cred);
+ st->st_gid = kauth_cred_getegid(fp->f_cred);
+
+ return 0;
+}
+
+static int
+timerfd_fop_close(file_t * const fp)
+{
+ struct timerfd * const tfd = fp->f_timerfd;
+
+ fp->f_timerfd = NULL;
+ timerfd_destroy(tfd);
+
+ return 0;
+}
+
+static void
+timerfd_filt_read_detach(struct knote * const kn)
+{
+ struct timerfd * const tfd = ((file_t *)kn->kn_obj)->f_timerfd;
+
+ itimer_lock();
+ KASSERT(kn->kn_hook == tfd);
+ selremove_knote(&tfd->tfd_read_sel, kn);
+ itimer_unlock();
+}
+
+static int
+timerfd_filt_read(struct knote * const kn, long const hint)
+{
+ struct timerfd * const tfd = ((file_t *)kn->kn_obj)->f_timerfd;
+
+ if (hint & NOTE_SUBMIT) {
+ KASSERT(itimer_lock_held());
+ } else {
+ itimer_lock();
+ }
+
+ kn->kn_data = (int64_t)timerfd_fire_count(tfd);
+
+ if ((hint & NOTE_SUBMIT) == 0) {
+ itimer_unlock();
+ }
+
+ return kn->kn_data != 0;
+}
+
+static const struct filterops timerfd_read_filterops = {
+ .f_isfd = 1,
+ .f_detach = timerfd_filt_read_detach,
+ .f_event = timerfd_filt_read,
+};
+
+static int
+timerfd_fop_kqfilter(file_t * const fp, struct knote * const kn)
+{
+ struct timerfd * const tfd = ((file_t *)kn->kn_obj)->f_timerfd;
+ struct selinfo *sel;
+
+ switch (kn->kn_filter) {
+ case EVFILT_READ:
+ sel = &tfd->tfd_read_sel;
+ kn->kn_fop = &timerfd_read_filterops;
+ break;
+
+ default:
+ return EINVAL;
+ }
+
+ kn->kn_hook = tfd;
+
+ itimer_lock();
+ selrecord_knote(sel, kn);
+ itimer_unlock();
+
+ return 0;
+}
+
+static void
+timerfd_fop_restart(file_t * const fp)
+{
+ struct timerfd * const tfd = fp->f_timerfd;
+
+ /*
+ * Unblock blocked reads in order to allow close() to complete.
+ * System calls return ERESTART so that the fd is revalidated.
+ */
+
+ itimer_lock();
+
+ if (tfd->tfd_nwaiters != 0) {
+ tfd->tfd_restarting = true;
+ cv_broadcast(&tfd->tfd_read_wait);
+ }
+
+ itimer_unlock();
+}
+
+static const struct fileops timerfd_fileops = {
+ .fo_name = "timerfd",
+ .fo_read = timerfd_fop_read,
+ .fo_write = fbadop_write,
+ .fo_ioctl = timerfd_fop_ioctl,
+ .fo_fcntl = fnullop_fcntl,
+ .fo_poll = timerfd_fop_poll,
+ .fo_stat = timerfd_fop_stat,
+ .fo_close = timerfd_fop_close,
+ .fo_kqfilter = timerfd_fop_kqfilter,
+ .fo_restart = timerfd_fop_restart,
+};
+
+/*
+ * timerfd_create(2) system call
+ */
+int
+do_timerfd_create(struct lwp * const l, clockid_t const clock_id,
+ int const flags, register_t *retval)
+{
+ file_t *fp;
+ int fd, error;
+
+ if (flags & ~(TFD_CLOEXEC | TFD_NONBLOCK)) {
+ return EINVAL;
+ }
+
+ switch (clock_id) {
+ case CLOCK_REALTIME:
+ case CLOCK_MONOTONIC:
+ /* allowed */
+ break;
+
+ default:
+ return EINVAL;
+ }
+
+ if ((error = fd_allocfile(&fp, &fd)) != 0) {
+ return error;
+ }
+
+ fp->f_flag = FREAD;
+ if (flags & TFD_NONBLOCK) {
+ fp->f_flag |= FNONBLOCK;
+ }
+ fp->f_type = DTYPE_TIMERFD;
+ fp->f_ops = &timerfd_fileops;
+ fp->f_timerfd = timerfd_create(clock_id, flags);
+ fd_set_exclose(l, fd, !!(flags & TFD_CLOEXEC));
+ fd_affix(curproc, fp, fd);
+
+ *retval = fd;
+ return 0;
+}
+
+int
+sys_timerfd_create(struct lwp *l, const struct sys_timerfd_create_args *uap,
+ register_t *retval)
+{
+ /* {
+ syscallarg(clockid_t) clock_id;
+ syscallarg(int) flags;
+ } */
+
+ return do_timerfd_create(l, SCARG(uap, clock_id), SCARG(uap, flags),
+ retval);
+}
+
+/*
+ * timerfd_gettime(2) system call.
+ */
+int
+do_timerfd_gettime(struct lwp *l, int fd, struct itimerspec *curr_value,
+ register_t *retval)
+{
+ file_t *fp;
+
+ if ((fp = fd_getfile(fd)) == NULL) {
+ return EBADF;
+ }
+
+ if (fp->f_ops != &timerfd_fileops) {
+ fd_putfile(fd);
+ return EINVAL;
+ }
+
+ struct timerfd * const tfd = fp->f_timerfd;
+ itimer_lock();
+ itimer_gettime(&tfd->tfd_itimer, curr_value);
+ itimer_unlock();
+
+ fd_putfile(fd);
+ return 0;
+}
+
+int
+sys_timerfd_gettime(struct lwp *l, const struct sys_timerfd_gettime_args *uap,
+ register_t *retval)
+{
+ /* {
+ syscallarg(int) fd;
+ syscallarg(struct itimerspec *) curr_value;
+ } */
+
+ struct itimerspec oits;
+ int error;
+
+ error = do_timerfd_gettime(l, SCARG(uap, fd), &oits, retval);
+ if (error == 0) {
+ error = copyout(&oits, SCARG(uap, curr_value), sizeof(oits));
+ }
+ return error;
+}
+
+/*
+ * timerfd_settime(2) system call.
+ */
+int
+do_timerfd_settime(struct lwp *l, int fd, int flags,
+ const struct itimerspec *new_value, struct itimerspec *old_value,
+ register_t *retval)
+{
+ file_t *fp;
+ int error;
+
+ if (flags & ~(TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET)) {
+ return EINVAL;
+ }
+
+ if ((fp = fd_getfile(fd)) == NULL) {
+ return EBADF;
+ }
+
+ if (fp->f_ops != &timerfd_fileops) {
+ fd_putfile(fd);
+ return EINVAL;
+ }
+
+ struct timerfd * const tfd = fp->f_timerfd;
+ struct itimer * const it = &tfd->tfd_itimer;
+
+ itimer_lock();
+
+ restart:
+ if (old_value != NULL) {
+ *old_value = it->it_time;
+ }
+ it->it_time = *new_value;
+
+ /*
+ * If we've been passed a relative value, convert it to an
+ * absolute, as that's what the itimer facility expects for
+ * non-virtual timers. Also ensure that this doesn't set it
+ * to zero or lets it go negative.
+ * XXXJRT re-factor.
+ */
+ if (timespecisset(&it->it_time.it_value) &&
+ (flags & TFD_TIMER_ABSTIME) == 0) {
+ struct timespec now;
+ if (it->it_clockid == CLOCK_REALTIME) {
+ getnanotime(&now);
+ } else { /* CLOCK_MONOTONIC */
+ getnanouptime(&now);
+ }
+ timespecadd(&it->it_time.it_value, &now,
+ &it->it_time.it_value);
+ }
+
+ error = itimer_settime(it);
+ if (error == ERESTART) {
+ goto restart;
+ }
+ KASSERT(error == 0);
+
+ /* Reset the expirations counter. */
+ it->it_overruns = 0;
+
+ if (it->it_clockid == CLOCK_REALTIME) {
+ tfd->tfd_cancelled = false;
+ tfd->tfd_cancel_on_set = !!(flags & TFD_TIMER_CANCEL_ON_SET);
+ }
+
+ getnanotime(&tfd->tfd_mtime);
+ itimer_unlock();
+
+ fd_putfile(fd);
+ return error;
+}
+
+int
+sys_timerfd_settime(struct lwp *l, const struct sys_timerfd_settime_args *uap,
+ register_t *retval)
+{
+ /* {
+ syscallarg(int) fd;
+ syscallarg(int) flags;
+ syscallarg(const struct itimerspec *) new_value;
+ syscallarg(struct itimerspec *) old_value;
+ } */
+
+ struct itimerspec nits, oits, *oitsp = NULL;
+ int error;
+
+ error = copyin(SCARG(uap, new_value), &nits, sizeof(nits));
+ if (error) {
+ return error;
+ }
+
+ if (SCARG(uap, old_value) != NULL) {
+ oitsp = &oits;
+ }
+
+ error = do_timerfd_settime(l, SCARG(uap, fd), SCARG(uap, flags),
+ &nits, oitsp, retval);
+ if (error == 0 && oitsp != NULL) {
+ error = copyout(oitsp, SCARG(uap, old_value), sizeof(*oitsp));
+ }
+ return error;
+}
Index: src/sys/sys/eventfd.h
diff -u /dev/null src/sys/sys/eventfd.h:1.2
--- /dev/null Sun Sep 19 15:51:29 2021
+++ src/sys/sys/eventfd.h Sun Sep 19 15:51:27 2021
@@ -0,0 +1,57 @@
+/* $NetBSD: eventfd.h,v 1.2 2021/09/19 15:51:27 thorpej Exp $ */
+
+/*-
+ * Copyright (c) 2020 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason R. Thorpe.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _SYS_EVENTFD_H_
+#define _SYS_EVENTFD_H_
+
+#include <sys/fcntl.h>
+
+/*
+ * Definitions for eventfd(2). This implementation is API compatible
+ * with the Linux eventfd(2) interface.
+ */
+
+typedef uint64_t eventfd_t;
+
+#define EFD_SEMAPHORE O_RDWR
+#define EFD_CLOEXEC O_CLOEXEC
+#define EFD_NONBLOCK O_NONBLOCK
+
+#ifdef _KERNEL
+struct lwp;
+int do_eventfd(struct lwp *, unsigned int, int, register_t *);
+#else /* ! _KERNEL */
+int eventfd(unsigned int, int);
+int eventfd_read(int, eventfd_t *);
+int eventfd_write(int, eventfd_t);
+#endif /* _KERNEL */
+
+#endif /* _SYS_EVENTFD_H_ */
Index: src/sys/sys/timerfd.h
diff -u /dev/null src/sys/sys/timerfd.h:1.2
--- /dev/null Sun Sep 19 15:51:29 2021
+++ src/sys/sys/timerfd.h Sun Sep 19 15:51:27 2021
@@ -0,0 +1,65 @@
+/* $NetBSD: timerfd.h,v 1.2 2021/09/19 15:51:27 thorpej Exp $ */
+
+/*-
+ * Copyright (c) 2020 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason R. Thorpe.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _SYS_TIMERFD_H_
+#define _SYS_TIMERFD_H_
+
+#include <sys/fcntl.h>
+#include <sys/ioccom.h>
+#include <sys/time.h>
+
+/*
+ * Definitions for timerfd_create(2) / timerfd_gettime(2) / timerfd_settime(2).
+ * This implementation is API compatible with the Linux interface.
+ */
+
+#define TFD_TIMER_ABSTIME O_WRONLY
+#define TFD_TIMER_CANCEL_ON_SET O_RDWR
+#define TFD_CLOEXEC O_CLOEXEC
+#define TFD_NONBLOCK O_NONBLOCK
+
+#define TFD_IOC_SET_TICKS _IOW('T', 0, uint64_t)
+
+#ifdef _KERNEL
+struct lwp;
+int do_timerfd_create(struct lwp *, clockid_t, int, register_t *);
+int do_timerfd_gettime(struct lwp *, int, struct itimerspec *,
+ register_t *);
+int do_timerfd_settime(struct lwp *, int, int, const struct itimerspec *,
+ struct itimerspec *, register_t *);
+#else /* ! _KERNEL */
+int timerfd_create(clockid_t, int);
+int timerfd_gettime(int, struct itimerspec *);
+int timerfd_settime(int, int, const struct itimerspec *,
+ struct itimerspec *);
+#endif /* _KERNEL */
+
+#endif /* _SYS_TIMERFD_H_ */
Index: src/tests/lib/libc/sys/t_eventfd.c
diff -u /dev/null src/tests/lib/libc/sys/t_eventfd.c:1.2
--- /dev/null Sun Sep 19 15:51:29 2021
+++ src/tests/lib/libc/sys/t_eventfd.c Sun Sep 19 15:51:28 2021
@@ -0,0 +1,790 @@
+/* $NetBSD: t_eventfd.c,v 1.2 2021/09/19 15:51:28 thorpej Exp $ */
+
+/*-
+ * Copyright (c) 2020 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__COPYRIGHT("@(#) Copyright (c) 2020\
+ The NetBSD Foundation, inc. All rights reserved.");
+__RCSID("$NetBSD: t_eventfd.c,v 1.2 2021/09/19 15:51:28 thorpej Exp $");
+
+#include <sys/types.h>
+#include <sys/event.h>
+#include <sys/eventfd.h>
+#include <sys/select.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <errno.h>
+#include <poll.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <atf-c.h>
+
+struct helper_context {
+ int efd;
+
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ pthread_barrier_t barrier;
+ int state;
+};
+
+static void
+init_helper_context(struct helper_context * const ctx)
+{
+ pthread_condattr_t condattr;
+
+ memset(ctx, 0, sizeof(*ctx));
+
+ ATF_REQUIRE(pthread_mutex_init(&ctx->mutex, NULL) == 0);
+
+ ATF_REQUIRE(pthread_condattr_init(&condattr) == 0);
+ ATF_REQUIRE(pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) == 0);
+ ATF_REQUIRE(pthread_cond_init(&ctx->cond, &condattr) == 0);
+ ATF_REQUIRE(pthread_condattr_destroy(&condattr) == 0);
+
+ ATF_REQUIRE(pthread_barrier_init(&ctx->barrier, NULL, 2) == 0);
+}
+
+static void
+set_state(struct helper_context * const ctx, int const new)
+{
+ pthread_mutex_lock(&ctx->mutex);
+ ctx->state = new;
+ pthread_cond_signal(&ctx->cond);
+ pthread_mutex_unlock(&ctx->mutex);
+}
+
+static int
+get_state(struct helper_context * const ctx)
+{
+ int rv;
+
+ pthread_mutex_lock(&ctx->mutex);
+ rv = ctx->state;
+ pthread_mutex_unlock(&ctx->mutex);
+
+ return rv;
+}
+
+static bool
+wait_state(struct helper_context * const ctx, int const val)
+{
+ struct timespec deadline;
+ int error;
+ bool rv;
+
+ pthread_mutex_lock(&ctx->mutex);
+
+ ATF_REQUIRE(clock_gettime(CLOCK_MONOTONIC, &deadline) == 0);
+ deadline.tv_sec += 5;
+
+ while (ctx->state != val) {
+ error = pthread_cond_timedwait(&ctx->cond, &ctx->mutex,
+ &deadline);
+ if (error) {
+ break;
+ }
+ }
+ rv = ctx->state == val;
+
+ pthread_mutex_unlock(&ctx->mutex);
+
+ return rv;
+}
+
+static bool
+wait_barrier(struct helper_context * const ctx)
+{
+ int rv = pthread_barrier_wait(&ctx->barrier);
+
+ return rv == 0 || rv == PTHREAD_BARRIER_SERIAL_THREAD;
+}
+
+/*****************************************************************************/
+
+static void *
+eventfd_normal_helper(void * const v)
+{
+ struct helper_context * const ctx = v;
+ eventfd_t efd_value;
+
+ ATF_REQUIRE(wait_barrier(ctx));
+
+ /* Read the value. This will reset it to zero. */
+ ATF_REQUIRE(get_state(ctx) == 666);
+ ATF_REQUIRE(eventfd_read(ctx->efd, &efd_value) == 0);
+
+ /* Assert the value. */
+ ATF_REQUIRE(efd_value == 0xcafebabe);
+
+ set_state(ctx, 0);
+
+ /* Wait for the main thread to prep the next test. */
+ ATF_REQUIRE(wait_barrier(ctx));
+
+ /* Read the value. */
+ ATF_REQUIRE(eventfd_read(ctx->efd, &efd_value) == 0);
+
+ /* Assert the value. */
+ ATF_REQUIRE(efd_value == 0xbeefcafe);
+
+ ATF_REQUIRE(wait_barrier(ctx));
+
+ return NULL;
+}
+
+ATF_TC(eventfd_normal);
+ATF_TC_HEAD(eventfd_normal, tc)
+{
+ atf_tc_set_md_var(tc, "descr",
+ "validates basic normal eventfd operation");
+}
+ATF_TC_BODY(eventfd_normal, tc)
+{
+ struct helper_context ctx;
+ pthread_t helper;
+ void *join_val;
+
+ init_helper_context(&ctx);
+
+ ATF_REQUIRE((ctx.efd = eventfd(0, 0)) >= 0);
+
+ ATF_REQUIRE(pthread_create(&helper, NULL,
+ eventfd_normal_helper, &ctx) == 0);
+
+ /*
+ * Wait for the helper to block in read(). Give it some time
+ * so that if the read fails or returns immediately, we'll
+ * notice.
+ */
+ set_state(&ctx, 666);
+ ATF_REQUIRE(wait_barrier(&ctx));
+ sleep(2);
+ ATF_REQUIRE(get_state(&ctx) == 666);
+
+ /* Write a distinct value; helper will assert it. */
+ ATF_REQUIRE(eventfd_write(ctx.efd, 0xcafebabe) == 0);
+
+ /* Wait for helper to read the value. */
+ ATF_REQUIRE(wait_state(&ctx, 0));
+
+ /* Helper is now blocked in a barrier. */
+
+ /* Test additive property of the efd value. */
+ ATF_REQUIRE(eventfd_write(ctx.efd, 0x0000cafe) == 0);
+ ATF_REQUIRE(eventfd_write(ctx.efd, 0xbeef0000) == 0);
+
+ /* Satisfy the barrier; helper will read value and assert 0xbeefcafe. */
+ ATF_REQUIRE(wait_barrier(&ctx));
+
+ /* And wait for it to finish. */
+ ATF_REQUIRE(wait_barrier(&ctx));
+
+ /* Reap the helper. */
+ ATF_REQUIRE(pthread_join(helper, &join_val) == 0);
+
+ (void) close(ctx.efd);
+}
+
+/*****************************************************************************/
+
+ATF_TC(eventfd_semaphore);
+ATF_TC_HEAD(eventfd_semaphore, tc)
+{
+ atf_tc_set_md_var(tc, "descr",
+ "validates semaphore and non-blocking eventfd operation");
+}
+ATF_TC_BODY(eventfd_semaphore, tc)
+{
+ eventfd_t efd_value;
+ int efd;
+
+ ATF_REQUIRE((efd = eventfd(3, EFD_SEMAPHORE | EFD_NONBLOCK)) >= 0);
+
+ /* 3 reads should succeed without blocking. */
+ ATF_REQUIRE(eventfd_read(efd, &efd_value) == 0);
+ ATF_REQUIRE(efd_value == 1);
+
+ ATF_REQUIRE(eventfd_read(efd, &efd_value) == 0);
+ ATF_REQUIRE(efd_value == 1);
+
+ ATF_REQUIRE(eventfd_read(efd, &efd_value) == 0);
+ ATF_REQUIRE(efd_value == 1);
+
+ /* This one should block. */
+ ATF_REQUIRE_ERRNO(EAGAIN,
+ eventfd_read(efd, &efd_value) == -1);
+
+ /* Add 1 to the semaphore. */
+ ATF_REQUIRE(eventfd_write(efd, 1) == 0);
+
+ /* One more read allowed. */
+ ATF_REQUIRE(eventfd_read(efd, &efd_value) == 0);
+ ATF_REQUIRE(efd_value == 1);
+
+ /* And this one again should block. */
+ ATF_REQUIRE_ERRNO(EAGAIN,
+ eventfd_read(efd, &efd_value) == -1);
+
+ (void) close(efd);
+}
+
+/*****************************************************************************/
+
+ATF_TC(eventfd_select_poll_kevent_immed);
+ATF_TC_HEAD(eventfd_select_poll_kevent_immed, tc)
+{
+ atf_tc_set_md_var(tc, "descr",
+ "validates select/poll/kevent behavior - immediate return");
+}
+ATF_TC_BODY(eventfd_select_poll_kevent_immed, tc)
+{
+ const struct timespec ts = { .tv_sec = 0, .tv_nsec = 0 };
+ struct timeval tv;
+ struct pollfd fds[1];
+ fd_set readfds, writefds, exceptfds;
+ int efd;
+ int kq;
+ struct kevent kev[2];
+
+ ATF_REQUIRE((efd = eventfd(0, EFD_NONBLOCK)) >= 0);
+
+ ATF_REQUIRE((kq = kqueue()) >= 0);
+ EV_SET(&kev[0], efd, EVFILT_READ, EV_ADD, 0, 0, NULL);
+ EV_SET(&kev[1], efd, EVFILT_WRITE, EV_ADD, 0, 0, NULL);
+ ATF_REQUIRE(kevent(kq, kev, 2, NULL, 0, &ts) == 0);
+
+ /*
+ * efd should be writable but not readable. Pass all of the
+ * event bits; we should only get back POLLOUT | POLLWRNORM.
+ */
+ fds[0].fd = efd;
+ fds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI |
+ POLLOUT | POLLWRNORM | POLLWRBAND | POLLHUP;
+ fds[0].revents = 0;
+ ATF_REQUIRE(poll(fds, 1, 0) == 1);
+ ATF_REQUIRE(fds[0].revents == (POLLOUT | POLLWRNORM));
+
+ /*
+ * As above; efd should only be set in writefds upon return
+ * from the select() call.
+ */
+ FD_ZERO(&readfds);
+ FD_ZERO(&writefds);
+ FD_ZERO(&exceptfds);
+ tv.tv_sec = 0;
+ tv.tv_usec = 0;
+ FD_SET(efd, &readfds);
+ FD_SET(efd, &writefds);
+ FD_SET(efd, &exceptfds);
+ ATF_REQUIRE(select(efd + 1, &readfds, &writefds, &exceptfds, &tv) == 1);
+ ATF_REQUIRE(!FD_ISSET(efd, &readfds));
+ ATF_REQUIRE(FD_ISSET(efd, &writefds));
+ ATF_REQUIRE(!FD_ISSET(efd, &exceptfds));
+
+ /*
+ * Check that we get an EVFILT_WRITE event (and only that event)
+ * on efd.
+ */
+ memset(kev, 0, sizeof(kev));
+ ATF_REQUIRE(kevent(kq, NULL, 0, kev, 2, &ts) == 1);
+ ATF_REQUIRE(kev[0].ident == (uintptr_t)efd);
+ ATF_REQUIRE(kev[0].filter == EVFILT_WRITE);
+ ATF_REQUIRE((kev[0].flags & (EV_EOF | EV_ERROR)) == 0);
+ ATF_REQUIRE(kev[0].data == 0);
+
+ /*
+ * Write the maximum value into the eventfd. This should result
+ * in the eventfd becoming readable but NOT writable.
+ */
+ ATF_REQUIRE(eventfd_write(efd, UINT64_MAX - 1) == 0);
+
+ fds[0].fd = efd;
+ fds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI |
+ POLLOUT | POLLWRNORM | POLLWRBAND | POLLHUP;
+ fds[0].revents = 0;
+ ATF_REQUIRE(poll(fds, 1, 0) == 1);
+ ATF_REQUIRE(fds[0].revents == (POLLIN | POLLRDNORM));
+
+ FD_ZERO(&readfds);
+ FD_ZERO(&writefds);
+ FD_ZERO(&exceptfds);
+ tv.tv_sec = 0;
+ tv.tv_usec = 0;
+ FD_SET(efd, &readfds);
+ FD_SET(efd, &writefds);
+ FD_SET(efd, &exceptfds);
+ ATF_REQUIRE(select(efd + 1, &readfds, &writefds, &exceptfds, &tv) == 1);
+ ATF_REQUIRE(FD_ISSET(efd, &readfds));
+ ATF_REQUIRE(!FD_ISSET(efd, &writefds));
+ ATF_REQUIRE(!FD_ISSET(efd, &exceptfds));
+
+ /*
+ * Check that we get an EVFILT_READ event (and only that event)
+ * on efd.
+ */
+ memset(kev, 0, sizeof(kev));
+ ATF_REQUIRE(kevent(kq, NULL, 0, kev, 2, &ts) == 1);
+ ATF_REQUIRE(kev[0].ident == (uintptr_t)efd);
+ ATF_REQUIRE(kev[0].filter == EVFILT_READ);
+ ATF_REQUIRE((kev[0].flags & (EV_EOF | EV_ERROR)) == 0);
+ ATF_REQUIRE(kev[0].data == (int64_t)(UINT64_MAX - 1));
+
+ (void) close(kq);
+ (void) close(efd);
+}
+
+/*****************************************************************************/
+
+static void *
+eventfd_select_poll_kevent_block_helper(void * const v)
+{
+ struct helper_context * const ctx = v;
+ struct pollfd fds[1];
+ fd_set selfds;
+ eventfd_t efd_value;
+ int kq;
+ struct kevent kev[1];
+
+ fds[0].fd = ctx->efd;
+ fds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI;
+ fds[0].revents = 0;
+
+ ATF_REQUIRE_ERRNO(EAGAIN,
+ eventfd_read(ctx->efd, &efd_value) == -1);
+
+ ATF_REQUIRE(wait_barrier(ctx));
+
+ ATF_REQUIRE(get_state(ctx) == 666);
+ ATF_REQUIRE(poll(fds, 1, INFTIM) == 1);
+ ATF_REQUIRE(fds[0].revents == (POLLIN | POLLRDNORM));
+ set_state(ctx, 0);
+
+ ATF_REQUIRE(wait_barrier(ctx));
+
+ /*
+ * The maximum value was written to the eventfd, so we
+ * should block waiting for writability.
+ */
+ fds[0].fd = ctx->efd;
+ fds[0].events = POLLOUT | POLLWRNORM;
+ fds[0].revents = 0;
+
+ ATF_REQUIRE_ERRNO(EAGAIN,
+ eventfd_write(ctx->efd, UINT64_MAX - 1) == -1);
+
+ ATF_REQUIRE(wait_barrier(ctx));
+
+ ATF_REQUIRE(get_state(ctx) == 666);
+ ATF_REQUIRE(poll(fds, 1, INFTIM) == 1);
+ ATF_REQUIRE(fds[0].revents == (POLLOUT | POLLWRNORM));
+ set_state(ctx, 0);
+
+ ATF_REQUIRE(wait_barrier(ctx));
+
+ /*
+ * Now, the same dance again, with select().
+ */
+
+ FD_ZERO(&selfds);
+ FD_SET(ctx->efd, &selfds);
+
+ ATF_REQUIRE_ERRNO(EAGAIN,
+ eventfd_read(ctx->efd, &efd_value) == -1);
+
+ ATF_REQUIRE(wait_barrier(ctx));
+
+ ATF_REQUIRE(get_state(ctx) == 666);
+ ATF_REQUIRE(select(ctx->efd + 1, &selfds, NULL, NULL, NULL) == 1);
+ ATF_REQUIRE(FD_ISSET(ctx->efd, &selfds));
+ set_state(ctx, 0);
+
+ ATF_REQUIRE(wait_barrier(ctx));
+
+ FD_ZERO(&selfds);
+ FD_SET(ctx->efd, &selfds);
+
+ ATF_REQUIRE_ERRNO(EAGAIN,
+ eventfd_write(ctx->efd, UINT64_MAX - 1) == -1);
+
+ ATF_REQUIRE(wait_barrier(ctx));
+
+ ATF_REQUIRE(get_state(ctx) == 666);
+ ATF_REQUIRE(select(ctx->efd + 1, NULL, &selfds, NULL, NULL) == 1);
+ ATF_REQUIRE(FD_ISSET(ctx->efd, &selfds));
+ set_state(ctx, 0);
+
+ ATF_REQUIRE(wait_barrier(ctx));
+
+ /*
+ * Now, the same dance again, with kevent().
+ */
+ ATF_REQUIRE((kq = kqueue()) >= 0);
+
+ EV_SET(&kev[0], ctx->efd, EVFILT_READ, EV_ADD | EV_ONESHOT, 0, 0, NULL);
+ ATF_REQUIRE(kevent(kq, kev, 1, NULL, 0, NULL) == 0);
+
+ ATF_REQUIRE_ERRNO(EAGAIN,
+ eventfd_read(ctx->efd, &efd_value) == -1);
+
+ ATF_REQUIRE(wait_barrier(ctx));
+
+ ATF_REQUIRE(get_state(ctx) == 666);
+ ATF_REQUIRE(kevent(kq, NULL, 0, kev, 1, NULL) == 1);
+ ATF_REQUIRE(kev[0].ident == (uintptr_t)ctx->efd);
+ ATF_REQUIRE(kev[0].filter == EVFILT_READ);
+ ATF_REQUIRE((kev[0].flags & (EV_EOF | EV_ERROR)) == 0);
+ ATF_REQUIRE(kev[0].data == (int64_t)(UINT64_MAX - 1));
+ set_state(ctx, 0);
+
+ ATF_REQUIRE(wait_barrier(ctx));
+
+ EV_SET(&kev[0], ctx->efd, EVFILT_WRITE, EV_ADD | EV_ONESHOT, 0, 0,
+ NULL);
+ ATF_REQUIRE(kevent(kq, kev, 1, NULL, 0, NULL) == 0);
+
+ ATF_REQUIRE_ERRNO(EAGAIN,
+ eventfd_write(ctx->efd, UINT64_MAX - 1) == -1);
+
+ ATF_REQUIRE(wait_barrier(ctx));
+
+ ATF_REQUIRE(get_state(ctx) == 666);
+ ATF_REQUIRE(kevent(kq, NULL, 0, kev, 1, NULL) == 1);
+ ATF_REQUIRE(kev[0].ident == (uintptr_t)ctx->efd);
+ ATF_REQUIRE(kev[0].filter == EVFILT_WRITE);
+ ATF_REQUIRE((kev[0].flags & (EV_EOF | EV_ERROR)) == 0);
+ ATF_REQUIRE(kev[0].data == 0);
+ set_state(ctx, 0);
+
+ ATF_REQUIRE(wait_barrier(ctx));
+
+ (void) close(kq);
+
+ return NULL;
+}
+
+ATF_TC(eventfd_select_poll_kevent_block);
+ATF_TC_HEAD(eventfd_select_poll_kevent_block, tc)
+{
+ atf_tc_set_md_var(tc, "descr",
+ "validates select/poll/kevent behavior - return after blocking");
+}
+ATF_TC_BODY(eventfd_select_poll_kevent_block, tc)
+{
+ struct helper_context ctx;
+ pthread_t helper;
+ eventfd_t efd_value;
+ void *join_val;
+
+ init_helper_context(&ctx);
+
+ ATF_REQUIRE((ctx.efd = eventfd(0, EFD_NONBLOCK)) >= 0);
+
+ ATF_REQUIRE(pthread_create(&helper, NULL,
+ eventfd_select_poll_kevent_block_helper,
+ &ctx) == 0);
+
+ /*
+ * Wait for the helper to block in poll(). Give it some time
+ * so that if the poll returns immediately, we'll notice.
+ */
+ set_state(&ctx, 666);
+ ATF_REQUIRE(wait_barrier(&ctx));
+ sleep(2);
+ ATF_REQUIRE(get_state(&ctx) == 666);
+
+ /*
+ * Write the max value to the eventfd so that it becomes readable
+ * and unblocks the helper waiting in poll().
+ */
+ ATF_REQUIRE(eventfd_write(ctx.efd, UINT64_MAX - 1) == 0);
+
+ /*
+ * Ensure the helper woke from the poll() call.
+ */
+ ATF_REQUIRE(wait_barrier(&ctx));
+ ATF_REQUIRE(get_state(&ctx) == 0);
+
+ /*
+ * Wait for the helper to block in poll(), this time waiting
+ * for writability.
+ */
+ set_state(&ctx, 666);
+ ATF_REQUIRE(wait_barrier(&ctx));
+ sleep(2);
+ ATF_REQUIRE(get_state(&ctx) == 666);
+
+ /*
+ * Now read the value, which will reset the eventfd to 0 and
+ * unblock the poll() call.
+ */
+ ATF_REQUIRE(eventfd_read(ctx.efd, &efd_value) == 0);
+ ATF_REQUIRE(efd_value == UINT64_MAX - 1);
+
+ /*
+ * Ensure that the helper woke from the poll() call.
+ */
+ ATF_REQUIRE(wait_barrier(&ctx));
+ ATF_REQUIRE(get_state(&ctx) == 0);
+
+ /*
+ * Wait for the helper to block in select(), waiting for readability.
+ */
+ set_state(&ctx, 666);
+ ATF_REQUIRE(wait_barrier(&ctx));
+ sleep(2);
+ ATF_REQUIRE(get_state(&ctx) == 666);
+
+ /*
+ * Write the max value to the eventfd so that it becomes readable
+ * and unblocks the helper waiting in select().
+ */
+ efd_value = UINT64_MAX - 1;
+ ATF_REQUIRE(eventfd_write(ctx.efd, UINT64_MAX - 1) == 0);
+
+ /*
+ * Ensure the helper woke from the select() call.
+ */
+ ATF_REQUIRE(wait_barrier(&ctx));
+ ATF_REQUIRE(get_state(&ctx) == 0);
+
+ /*
+ * Wait for the helper to block in select(), this time waiting
+ * for writability.
+ */
+ set_state(&ctx, 666);
+ ATF_REQUIRE(wait_barrier(&ctx));
+ sleep(2);
+ ATF_REQUIRE(get_state(&ctx) == 666);
+
+ /*
+ * Now read the value, which will reset the eventfd to 0 and
+ * unblock the select() call.
+ */
+ ATF_REQUIRE(eventfd_read(ctx.efd, &efd_value) == 0);
+ ATF_REQUIRE(efd_value == UINT64_MAX - 1);
+
+ /*
+ * Ensure that the helper woke from the select() call.
+ */
+ ATF_REQUIRE(wait_barrier(&ctx));
+ ATF_REQUIRE(get_state(&ctx) == 0);
+
+ /*
+ * Wait for the helper to block in kevent(), waiting for readability.
+ */
+ set_state(&ctx, 666);
+ ATF_REQUIRE(wait_barrier(&ctx));
+ sleep(2);
+ ATF_REQUIRE(get_state(&ctx) == 666);
+
+ /*
+ * Write the max value to the eventfd so that it becomes readable
+ * and unblocks the helper waiting in kevent().
+ */
+ efd_value = UINT64_MAX - 1;
+ ATF_REQUIRE(eventfd_write(ctx.efd, UINT64_MAX - 1) == 0);
+
+ /*
+ * Ensure the helper woke from the kevent() call.
+ */
+ ATF_REQUIRE(wait_barrier(&ctx));
+ ATF_REQUIRE(get_state(&ctx) == 0);
+
+ /*
+ * Wait for the helper to block in kevent(), this time waiting
+ * for writability.
+ */
+ set_state(&ctx, 666);
+ ATF_REQUIRE(wait_barrier(&ctx));
+ sleep(2);
+ ATF_REQUIRE(get_state(&ctx) == 666);
+
+ /*
+ * Now read the value, which will reset the eventfd to 0 and
+ * unblock the select() call.
+ */
+ ATF_REQUIRE(eventfd_read(ctx.efd, &efd_value) == 0);
+ ATF_REQUIRE(efd_value == UINT64_MAX - 1);
+
+ /*
+ * Ensure that the helper woke from the kevent() call.
+ */
+ ATF_REQUIRE(wait_barrier(&ctx));
+ ATF_REQUIRE(get_state(&ctx) == 0);
+
+ /* Reap the helper. */
+ ATF_REQUIRE(pthread_join(helper, &join_val) == 0);
+
+ (void) close(ctx.efd);
+}
+
+/*****************************************************************************/
+
+static void *
+eventfd_restart_helper(void * const v)
+{
+ struct helper_context * const ctx = v;
+ eventfd_t efd_value;
+
+ /*
+ * Issue a single read to ensure that the descriptor is valid.
+ * Thius will not block because it was created with an initial
+ * count of 1.
+ */
+ ATF_REQUIRE(eventfd_read(ctx->efd, &efd_value) == 0);
+ ATF_REQUIRE(efd_value == 1);
+
+ ATF_REQUIRE(wait_barrier(ctx));
+
+ /*
+ * Block in read. The main thread will close the descriptor,
+ * which should unblock us and result in EBADF.
+ */
+ ATF_REQUIRE(get_state(ctx) == 666);
+ ATF_REQUIRE_ERRNO(EBADF, eventfd_read(ctx->efd, &efd_value) == -1);
+ set_state(ctx, 0);
+
+ ATF_REQUIRE(wait_barrier(ctx));
+
+ return NULL;
+}
+
+ATF_TC(eventfd_restart);
+ATF_TC_HEAD(eventfd_restart, tc)
+{
+ atf_tc_set_md_var(tc, "descr",
+ "exercises the 'restart' fileop code path");
+}
+ATF_TC_BODY(eventfd_restart, tc)
+{
+ struct helper_context ctx;
+ pthread_t helper;
+ void *join_val;
+
+ init_helper_context(&ctx);
+
+ ATF_REQUIRE((ctx.efd = eventfd(1, 0)) >= 0);
+
+ ATF_REQUIRE(pthread_create(&helper, NULL,
+ eventfd_restart_helper, &ctx) == 0);
+
+ /*
+ * Wait for the helper to block in read(). Give it some time
+ * so that if the poll returns immediately, we'll notice.
+ */
+ set_state(&ctx, 666);
+ ATF_REQUIRE(wait_barrier(&ctx));
+ sleep(2);
+ ATF_REQUIRE(get_state(&ctx) == 666);
+
+ /*
+ * Close the descriptor. This should unblock the reader,
+ * and cause it to receive EBADF.
+ */
+ ATF_REQUIRE(close(ctx.efd) == 0);
+
+ /*
+ * Ensure that the helper woke from the read() call.
+ */
+ ATF_REQUIRE(wait_barrier(&ctx));
+ ATF_REQUIRE(get_state(&ctx) == 0);
+
+ /* Reap the helper. */
+ ATF_REQUIRE(pthread_join(helper, &join_val) == 0);
+}
+
+/*****************************************************************************/
+
+ATF_TC(eventfd_badflags);
+ATF_TC_HEAD(eventfd_badflags, tc)
+{
+ atf_tc_set_md_var(tc, "descr",
+ "validates behavior when eventfd() called with bad flags");
+}
+ATF_TC_BODY(eventfd_badflags, tc)
+{
+ ATF_REQUIRE_ERRNO(EINVAL,
+ eventfd(0, ~(EFD_SEMAPHORE | EFD_CLOEXEC | EFD_NONBLOCK)) == -1);
+}
+
+/*****************************************************************************/
+
+ATF_TC(eventfd_bufsize);
+ATF_TC_HEAD(eventfd_bufsize, tc)
+{
+ atf_tc_set_md_var(tc, "descr",
+ "validates expected buffer size behavior");
+}
+ATF_TC_BODY(eventfd_bufsize, tc)
+{
+ eventfd_t efd_value[2];
+ int efd;
+
+ ATF_REQUIRE((efd = eventfd(1, EFD_NONBLOCK)) >= 0);
+
+ ATF_REQUIRE_ERRNO(EINVAL,
+ read(efd, efd_value, sizeof(efd_value[0]) - 1) == -1);
+
+ efd_value[0] = 0xdeadbeef;
+ efd_value[1] = 0xdeadbeef;
+ ATF_REQUIRE(read(efd, efd_value, sizeof(efd_value)) ==
+ sizeof(efd_value[0]));
+ ATF_REQUIRE(efd_value[0] == 1);
+ ATF_REQUIRE(efd_value[1] == 0xdeadbeef);
+
+ ATF_REQUIRE_ERRNO(EINVAL,
+ write(efd, efd_value, sizeof(efd_value[0]) - 1) == -1);
+ ATF_REQUIRE(write(efd, efd_value, sizeof(efd_value)) ==
+ sizeof(efd_value[0]));
+
+ ATF_REQUIRE(read(efd, efd_value, sizeof(efd_value)) ==
+ sizeof(efd_value[0]));
+ ATF_REQUIRE(efd_value[0] == 1);
+ ATF_REQUIRE(efd_value[1] == 0xdeadbeef);
+
+ (void) close(efd);
+}
+
+/*****************************************************************************/
+
+ATF_TP_ADD_TCS(tp)
+{
+ ATF_TP_ADD_TC(tp, eventfd_normal);
+ ATF_TP_ADD_TC(tp, eventfd_semaphore);
+ ATF_TP_ADD_TC(tp, eventfd_badflags);
+ ATF_TP_ADD_TC(tp, eventfd_bufsize);
+ ATF_TP_ADD_TC(tp, eventfd_select_poll_kevent_immed);
+ ATF_TP_ADD_TC(tp, eventfd_select_poll_kevent_block);
+ ATF_TP_ADD_TC(tp, eventfd_restart);
+
+ return atf_no_error();
+}
Index: src/tests/lib/libc/sys/t_timerfd.c
diff -u /dev/null src/tests/lib/libc/sys/t_timerfd.c:1.2
--- /dev/null Sun Sep 19 15:51:29 2021
+++ src/tests/lib/libc/sys/t_timerfd.c Sun Sep 19 15:51:28 2021
@@ -0,0 +1,602 @@
+/* $NetBSD: t_timerfd.c,v 1.2 2021/09/19 15:51:28 thorpej Exp $ */
+
+/*-
+ * Copyright (c) 2020 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__COPYRIGHT("@(#) Copyright (c) 2020\
+ The NetBSD Foundation, inc. All rights reserved.");
+__RCSID("$NetBSD: t_timerfd.c,v 1.2 2021/09/19 15:51:28 thorpej Exp $");
+
+#include <sys/types.h>
+#include <sys/event.h>
+#include <sys/select.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/timerfd.h>
+#include <errno.h>
+#include <poll.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <atf-c.h>
+
+struct helper_context {
+ int fd;
+
+ pthread_barrier_t barrier;
+};
+
+static void
+init_helper_context(struct helper_context * const ctx)
+{
+
+ memset(ctx, 0, sizeof(*ctx));
+
+ ATF_REQUIRE(pthread_barrier_init(&ctx->barrier, NULL, 2) == 0);
+}
+
+static bool
+wait_barrier(struct helper_context * const ctx)
+{
+ int rv = pthread_barrier_wait(&ctx->barrier);
+
+ return rv == 0 || rv == PTHREAD_BARRIER_SERIAL_THREAD;
+}
+
+/*****************************************************************************/
+
+static int
+timerfd_read(int fd, uint64_t *valp)
+{
+ uint64_t val;
+
+ switch (read(fd, &val, sizeof(val))) {
+ case -1:
+ return -1;
+
+ case sizeof(val):
+ *valp = val;
+ return 0;
+
+ default:
+ /* ?? Should never happen. */
+ errno = EIO;
+ return -1;
+ }
+}
+
+/*****************************************************************************/
+
+ATF_TC(timerfd_create);
+ATF_TC_HEAD(timerfd_create, tc)
+{
+ atf_tc_set_md_var(tc, "descr", "validates timerfd_create()");
+}
+ATF_TC_BODY(timerfd_create, tc)
+{
+ int fd;
+
+ ATF_REQUIRE((fd = timerfd_create(CLOCK_REALTIME, 0)) >= 0);
+ (void) close(fd);
+
+ ATF_REQUIRE((fd = timerfd_create(CLOCK_MONOTONIC, 0)) >= 0);
+ (void) close(fd);
+
+ ATF_REQUIRE_ERRNO(EINVAL,
+ (fd = timerfd_create(CLOCK_VIRTUAL, 0)) == -1);
+
+ ATF_REQUIRE_ERRNO(EINVAL,
+ (fd = timerfd_create(CLOCK_PROF, 0)) == -1);
+
+ ATF_REQUIRE_ERRNO(EINVAL,
+ (fd = timerfd_create(CLOCK_REALTIME,
+ ~(TFD_CLOEXEC | TFD_NONBLOCK))) == -1);
+}
+
+/*****************************************************************************/
+
+ATF_TC(timerfd_bogusfd);
+ATF_TC_HEAD(timerfd_bogusfd, tc)
+{
+ atf_tc_set_md_var(tc, "descr",
+ "validates rejection of bogus fds by timerfd_{get,set}time()");
+}
+ATF_TC_BODY(timerfd_bogusfd, tc)
+{
+ struct itimerspec its = { 0 };
+ int fd;
+
+ ATF_REQUIRE((fd = kqueue()) >= 0); /* arbitrary fd type */
+
+ ATF_REQUIRE_ERRNO(EINVAL,
+ timerfd_gettime(fd, &its) == -1);
+
+ its.it_value.tv_sec = 5;
+ ATF_REQUIRE_ERRNO(EINVAL,
+ timerfd_settime(fd, 0, &its, NULL) == -1);
+
+ (void) close(fd);
+}
+
+/*****************************************************************************/
+
+ATF_TC(timerfd_block);
+ATF_TC_HEAD(timerfd_block, tc)
+{
+ atf_tc_set_md_var(tc, "descr", "validates blocking behavior");
+}
+ATF_TC_BODY(timerfd_block, tc)
+{
+ struct timespec then, now, delta;
+ uint64_t val;
+ int fd;
+
+ ATF_REQUIRE((fd = timerfd_create(CLOCK_MONOTONIC, 0)) >= 0);
+
+ const struct itimerspec its = {
+ .it_value = { .tv_sec = 1, .tv_nsec = 0 },
+ .it_interval = { .tv_sec = 0, .tv_nsec = 0 },
+ };
+
+ ATF_REQUIRE(clock_gettime(CLOCK_MONOTONIC, &then) == 0);
+ ATF_REQUIRE(timerfd_settime(fd, 0, &its, NULL) == 0);
+ ATF_REQUIRE(timerfd_read(fd, &val) == 0);
+ ATF_REQUIRE(clock_gettime(CLOCK_MONOTONIC, &now) == 0);
+ ATF_REQUIRE(val == 1);
+
+ timespecsub(&now, &then, &delta);
+ ATF_REQUIRE(delta.tv_sec == 1);
+
+ (void) close(fd);
+}
+
+/*****************************************************************************/
+
+ATF_TC(timerfd_repeating);
+ATF_TC_HEAD(timerfd_repeating, tc)
+{
+ atf_tc_set_md_var(tc, "descr", "validates repeating timer behavior");
+}
+ATF_TC_BODY(timerfd_repeating, tc)
+{
+ struct timespec then, now, delta;
+ uint64_t val;
+ int fd;
+
+ ATF_REQUIRE((fd = timerfd_create(CLOCK_MONOTONIC,
+ TFD_NONBLOCK)) >= 0);
+
+ const struct itimerspec its = {
+ .it_value = { .tv_sec = 0, .tv_nsec = 200000000 },
+ .it_interval = { .tv_sec = 0, .tv_nsec = 200000000 },
+ };
+
+ ATF_REQUIRE(clock_gettime(CLOCK_MONOTONIC, &then) == 0);
+ ATF_REQUIRE(timerfd_settime(fd, 0, &its, NULL) == 0);
+ ATF_REQUIRE(sleep(1) == 0);
+ ATF_REQUIRE(clock_gettime(CLOCK_MONOTONIC, &now) == 0);
+ ATF_REQUIRE(timerfd_read(fd, &val) == 0);
+ ATF_REQUIRE(val >= 3 && val <= 5); /* allow some slop */
+
+ timespecsub(&now, &then, &delta);
+ ATF_REQUIRE(delta.tv_sec == 1);
+
+ (void) close(fd);
+}
+
+/*****************************************************************************/
+
+ATF_TC(timerfd_abstime);
+ATF_TC_HEAD(timerfd_abstime, tc)
+{
+ atf_tc_set_md_var(tc, "descr", "validates specifying abstime");
+}
+ATF_TC_BODY(timerfd_abstime, tc)
+{
+ struct timespec then, now, delta;
+ uint64_t val;
+ int fd;
+
+ ATF_REQUIRE((fd = timerfd_create(CLOCK_MONOTONIC, 0)) >= 0);
+
+ struct itimerspec its = {
+ .it_value = { .tv_sec = 0, .tv_nsec = 0 },
+ .it_interval = { .tv_sec = 0, .tv_nsec = 0 },
+ };
+
+ ATF_REQUIRE(clock_gettime(CLOCK_MONOTONIC, &then) == 0);
+ its.it_value = then;
+ its.it_value.tv_sec += 1;
+ ATF_REQUIRE(timerfd_settime(fd, TFD_TIMER_ABSTIME, &its, NULL) == 0);
+ ATF_REQUIRE(timerfd_read(fd, &val) == 0);
+ ATF_REQUIRE(clock_gettime(CLOCK_MONOTONIC, &now) == 0);
+ ATF_REQUIRE(val == 1);
+
+ timespecsub(&now, &then, &delta);
+ ATF_REQUIRE(delta.tv_sec == 1);
+
+ (void) close(fd);
+}
+
+/*****************************************************************************/
+
+ATF_TC(timerfd_cancel_on_set_immed);
+ATF_TC_HEAD(timerfd_cancel_on_set_immed, tc)
+{
+ atf_tc_set_md_var(tc, "descr", "validates cancel-on-set - immediate");
+ atf_tc_set_md_var(tc, "require.user", "root");
+}
+ATF_TC_BODY(timerfd_cancel_on_set_immed, tc)
+{
+ struct timespec now;
+ uint64_t val;
+ int fd;
+
+ ATF_REQUIRE((fd = timerfd_create(CLOCK_REALTIME, 0)) >= 0);
+
+ const struct itimerspec its = {
+ .it_value = { .tv_sec = 60 * 60, .tv_nsec = 0 },
+ .it_interval = { .tv_sec = 0, .tv_nsec = 0 },
+ };
+
+ ATF_REQUIRE(clock_gettime(CLOCK_REALTIME, &now) == 0);
+ ATF_REQUIRE(timerfd_settime(fd, TFD_TIMER_CANCEL_ON_SET,
+ &its, NULL) == 0);
+ ATF_REQUIRE(clock_settime(CLOCK_REALTIME, &now) == 0);
+ ATF_REQUIRE_ERRNO(ECANCELED, timerfd_read(fd, &val) == -1);
+
+ (void) close(fd);
+}
+
+/*****************************************************************************/
+
+static void *
+timerfd_cancel_on_set_block_helper(void * const v)
+{
+ struct helper_context * const ctx = v;
+ struct timespec now;
+
+ ATF_REQUIRE(wait_barrier(ctx));
+
+ ATF_REQUIRE(sleep(2) == 0);
+ ATF_REQUIRE(clock_gettime(CLOCK_REALTIME, &now) == 0);
+ ATF_REQUIRE(clock_settime(CLOCK_REALTIME, &now) == 0);
+
+ return NULL;
+}
+
+ATF_TC(timerfd_cancel_on_set_block);
+ATF_TC_HEAD(timerfd_cancel_on_set_block, tc)
+{
+ atf_tc_set_md_var(tc, "descr", "validates cancel-on-set - blocking");
+ atf_tc_set_md_var(tc, "require.user", "root");
+}
+ATF_TC_BODY(timerfd_cancel_on_set_block, tc)
+{
+ struct helper_context ctx;
+ pthread_t helper;
+ void *join_val;
+ uint64_t val;
+ int fd;
+
+ ATF_REQUIRE((fd = timerfd_create(CLOCK_REALTIME, 0)) >= 0);
+
+ const struct itimerspec its = {
+ .it_value = { .tv_sec = 60 * 60, .tv_nsec = 0 },
+ .it_interval = { .tv_sec = 0, .tv_nsec = 0 },
+ };
+
+ init_helper_context(&ctx);
+
+ ATF_REQUIRE(timerfd_settime(fd, TFD_TIMER_CANCEL_ON_SET,
+ &its, NULL) == 0);
+ ATF_REQUIRE(pthread_create(&helper, NULL,
+ timerfd_cancel_on_set_block_helper, &ctx) == 0);
+ ATF_REQUIRE(wait_barrier(&ctx));
+ ATF_REQUIRE_ERRNO(ECANCELED, timerfd_read(fd, &val) == -1);
+
+ ATF_REQUIRE(pthread_join(helper, &join_val) == 0);
+
+ (void) close(fd);
+}
+
+/*****************************************************************************/
+
+ATF_TC(timerfd_select_poll_kevent_immed);
+ATF_TC_HEAD(timerfd_select_poll_kevent_immed, tc)
+{
+ atf_tc_set_md_var(tc, "descr",
+ "validates select/poll/kevent behavior - immediate return");
+}
+ATF_TC_BODY(timerfd_select_poll_kevent_immed, tc)
+{
+ const struct timespec ts = { .tv_sec = 0, .tv_nsec = 0 };
+ struct itimerspec its;
+ struct timeval tv;
+ struct stat st;
+ struct pollfd fds[1];
+ uint64_t val;
+ fd_set readfds, writefds, exceptfds;
+ int fd;
+ int kq;
+ struct kevent kev[1];
+
+ ATF_REQUIRE((fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK)) >= 0);
+
+ ATF_REQUIRE((kq = kqueue()) >= 0);
+ EV_SET(&kev[0], fd, EVFILT_READ, EV_ADD, 0, 0, NULL);
+ ATF_REQUIRE(kevent(kq, kev, 1, NULL, 0, &ts) == 0);
+
+ /*
+ * fd should be writable but not readable. Pass all of the
+ * event bits; we should only get back POLLOUT | POLLWRNORM.
+ * (It's writable only in so far as we'll get an error if we try.)
+ */
+ fds[0].fd = fd;
+ fds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI |
+ POLLOUT | POLLWRNORM | POLLWRBAND | POLLHUP;
+ fds[0].revents = 0;
+ ATF_REQUIRE(poll(fds, 1, 0) == 1);
+ ATF_REQUIRE(fds[0].revents == (POLLOUT | POLLWRNORM));
+
+ /*
+ * As above; fd should only be set in writefds upon return
+ * from the select() call.
+ */
+ FD_ZERO(&readfds);
+ FD_ZERO(&writefds);
+ FD_ZERO(&exceptfds);
+ tv.tv_sec = 0;
+ tv.tv_usec = 0;
+ FD_SET(fd, &readfds);
+ FD_SET(fd, &writefds);
+ FD_SET(fd, &exceptfds);
+ ATF_REQUIRE(select(fd + 1, &readfds, &writefds, &exceptfds, &tv) == 1);
+ ATF_REQUIRE(!FD_ISSET(fd, &readfds));
+ ATF_REQUIRE(FD_ISSET(fd, &writefds));
+ ATF_REQUIRE(!FD_ISSET(fd, &exceptfds));
+
+ /*
+ * Now set a one-shot half-second timer, wait for it to expire, and
+ * then check again.
+ */
+ memset(&its, 0, sizeof(its));
+ its.it_value.tv_sec = 0;
+ its.it_value.tv_nsec = 500000000;
+ ATF_REQUIRE(timerfd_settime(fd, 0, &its, NULL) == 0);
+ ATF_REQUIRE(sleep(2) == 0);
+
+ /* Verify it actually fired via the stat() back-channel. */
+ ATF_REQUIRE(fstat(fd, &st) == 0);
+ ATF_REQUIRE(st.st_size == 1);
+
+ fds[0].fd = fd;
+ fds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI |
+ POLLOUT | POLLWRNORM | POLLWRBAND | POLLHUP;
+ fds[0].revents = 0;
+ ATF_REQUIRE(poll(fds, 1, 0) == 1);
+ ATF_REQUIRE(fds[0].revents == (POLLIN | POLLRDNORM |
+ POLLOUT | POLLWRNORM));
+
+ FD_ZERO(&readfds);
+ FD_ZERO(&writefds);
+ FD_ZERO(&exceptfds);
+ tv.tv_sec = 0;
+ tv.tv_usec = 0;
+ FD_SET(fd, &readfds);
+ FD_SET(fd, &writefds);
+ FD_SET(fd, &exceptfds);
+ ATF_REQUIRE(select(fd + 1, &readfds, &writefds, &exceptfds, &tv) == 2);
+ ATF_REQUIRE(FD_ISSET(fd, &readfds));
+ ATF_REQUIRE(FD_ISSET(fd, &writefds));
+ ATF_REQUIRE(!FD_ISSET(fd, &exceptfds));
+
+ /*
+ * Check that we get an EVFILT_READ event on fd.
+ */
+ memset(kev, 0, sizeof(kev));
+ ATF_REQUIRE(kevent(kq, NULL, 0, kev, 1, &ts) == 1);
+ ATF_REQUIRE(kev[0].ident == (uintptr_t)fd);
+ ATF_REQUIRE(kev[0].filter == EVFILT_READ);
+ ATF_REQUIRE((kev[0].flags & (EV_EOF | EV_ERROR)) == 0);
+ ATF_REQUIRE(kev[0].data == 1);
+
+ /*
+ * Read the timerfd to ensure we get the correct numnber of
+ * expirations.
+ */
+ ATF_REQUIRE(timerfd_read(fd, &val) == 0);
+ ATF_REQUIRE(val == 1);
+
+ /* And ensure that we would block if we tried again. */
+ ATF_REQUIRE_ERRNO(EAGAIN, timerfd_read(fd, &val) == -1);
+
+ (void) close(kq);
+ (void) close(fd);
+}
+
+/*****************************************************************************/
+
+ATF_TC(timerfd_select_poll_kevent_block);
+ATF_TC_HEAD(timerfd_select_poll_kevent_block, tc)
+{
+ atf_tc_set_md_var(tc, "descr",
+ "validates select/poll/kevent behavior - blocking");
+}
+ATF_TC_BODY(timerfd_select_poll_kevent_block, tc)
+{
+ const struct timespec ts = { .tv_sec = 0, .tv_nsec = 0 };
+ struct timespec then, now;
+ struct pollfd fds[1];
+ fd_set readfds;
+ int fd;
+ int kq;
+ struct kevent kev[1];
+
+ ATF_REQUIRE((fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK)) >= 0);
+
+ ATF_REQUIRE((kq = kqueue()) >= 0);
+ EV_SET(&kev[0], fd, EVFILT_READ, EV_ADD, 0, 0, NULL);
+ ATF_REQUIRE(kevent(kq, kev, 1, NULL, 0, &ts) == 0);
+
+ /*
+ * For each of these tests, we do the following:
+ *
+ * - Get the current time.
+ * - Set a 1-second one-shot timer.
+ * - Block in the multiplexing call.
+ * - Get the current time and verify that the timer expiration
+ * interval has passed.
+ */
+
+ const struct itimerspec its = {
+ .it_value = { .tv_sec = 1, .tv_nsec = 0 },
+ .it_interval = { .tv_sec = 0, .tv_nsec = 0 },
+ };
+
+ /* poll(2) */
+ fds[0].fd = fd;
+ fds[0].events = POLLIN | POLLRDNORM;
+ fds[0].revents = 0;
+
+ ATF_REQUIRE(clock_gettime(CLOCK_MONOTONIC, &then) == 0);
+ ATF_REQUIRE(timerfd_settime(fd, 0, &its, NULL) == 0);
+ ATF_REQUIRE(poll(fds, 1, INFTIM) == 1);
+ ATF_REQUIRE(clock_gettime(CLOCK_MONOTONIC, &now) == 0);
+ ATF_REQUIRE(fds[0].revents == (POLLIN | POLLRDNORM));
+ ATF_REQUIRE(now.tv_sec - then.tv_sec >= 1);
+
+ /* select(2) */
+ FD_ZERO(&readfds);
+ FD_SET(fd, &readfds);
+
+ ATF_REQUIRE(clock_gettime(CLOCK_MONOTONIC, &then) == 0);
+ ATF_REQUIRE(timerfd_settime(fd, 0, &its, NULL) == 0);
+ ATF_REQUIRE(select(fd + 1, &readfds, NULL, NULL, NULL) == 1);
+ ATF_REQUIRE(clock_gettime(CLOCK_MONOTONIC, &now) == 0);
+ ATF_REQUIRE(FD_ISSET(fd, &readfds));
+ ATF_REQUIRE(now.tv_sec - then.tv_sec >= 1);
+
+ /* kevent(2) */
+ memset(kev, 0, sizeof(kev));
+ ATF_REQUIRE(clock_gettime(CLOCK_MONOTONIC, &then) == 0);
+ ATF_REQUIRE(timerfd_settime(fd, 0, &its, NULL) == 0);
+ ATF_REQUIRE(kevent(kq, NULL, 0, kev, 1, NULL) == 1);
+ ATF_REQUIRE(clock_gettime(CLOCK_MONOTONIC, &now) == 0);
+ ATF_REQUIRE(kev[0].ident == (uintptr_t)fd);
+ ATF_REQUIRE(kev[0].filter == EVFILT_READ);
+ ATF_REQUIRE((kev[0].flags & (EV_EOF | EV_ERROR)) == 0);
+ ATF_REQUIRE(kev[0].data == 1);
+
+ (void) close(kq);
+ (void) close(fd);
+}
+
+/*****************************************************************************/
+
+static void *
+timerfd_restart_helper(void * const v)
+{
+ struct helper_context * const ctx = v;
+
+ ATF_REQUIRE(wait_barrier(ctx));
+
+ /*
+ * Wait 5 seconds (that should give the main thread time to
+ * block), and then close the descriptor.
+ */
+ ATF_REQUIRE(sleep(5) == 0);
+ ATF_REQUIRE(close(ctx->fd) == 0);
+
+ return NULL;
+}
+
+ATF_TC(timerfd_restart);
+ATF_TC_HEAD(timerfd_restart, tc)
+{
+ atf_tc_set_md_var(tc, "descr",
+ "exercises the 'restart' fileop code path");
+}
+ATF_TC_BODY(timerfd_restart, tc)
+{
+ struct timespec then, now, delta;
+ struct helper_context ctx;
+ uint64_t val;
+ pthread_t helper;
+ void *join_val;
+
+ init_helper_context(&ctx);
+
+ ATF_REQUIRE((ctx.fd = timerfd_create(CLOCK_MONOTONIC, 0)) >= 0);
+
+ const struct itimerspec its = {
+ .it_value = { .tv_sec = 60 * 60, .tv_nsec = 0 },
+ .it_interval = { .tv_sec = 0, .tv_nsec = 0 },
+ };
+ ATF_REQUIRE(timerfd_settime(ctx.fd, 0, &its, NULL) == 0);
+
+
+ ATF_REQUIRE(clock_gettime(CLOCK_MONOTONIC, &then) == 0);
+ ATF_REQUIRE(pthread_create(&helper, NULL,
+ timerfd_restart_helper, &ctx) == 0);
+
+ /*
+ * Wait for the helper to be ready, and then immediately block
+ * in read(). The helper will close the file, and we should get
+ * EBADF after a few seconds.
+ */
+ ATF_REQUIRE(wait_barrier(&ctx));
+ ATF_REQUIRE_ERRNO(EBADF, timerfd_read(ctx.fd, &val) == -1);
+ ATF_REQUIRE(clock_gettime(CLOCK_MONOTONIC, &now) == 0);
+
+ timespecsub(&now, &then, &delta);
+ ATF_REQUIRE(delta.tv_sec >= 5);
+
+ /* Reap the helper. */
+ ATF_REQUIRE(pthread_join(helper, &join_val) == 0);
+}
+
+/*****************************************************************************/
+
+ATF_TP_ADD_TCS(tp)
+{
+ ATF_TP_ADD_TC(tp, timerfd_create);
+ ATF_TP_ADD_TC(tp, timerfd_bogusfd);
+ ATF_TP_ADD_TC(tp, timerfd_block);
+ ATF_TP_ADD_TC(tp, timerfd_repeating);
+ ATF_TP_ADD_TC(tp, timerfd_abstime);
+ ATF_TP_ADD_TC(tp, timerfd_cancel_on_set_block);
+ ATF_TP_ADD_TC(tp, timerfd_cancel_on_set_immed);
+ ATF_TP_ADD_TC(tp, timerfd_select_poll_kevent_immed);
+ ATF_TP_ADD_TC(tp, timerfd_select_poll_kevent_block);
+ ATF_TP_ADD_TC(tp, timerfd_restart);
+
+ return atf_no_error();
+}