On 22/10/21(Fri) 13:11, Visa Hankala wrote:
> Here is another attempt to set klist lock for sockets. This is a revised
> version of a patch that I posted in January [1].
>
> Using solock() for the klists is probably the easiest way at the time
> being. However, the lock is a potential point of contention because of
> the underlying big-lock design. The increase of overhead is related to
> adding and removing event registrations. With persistent registrations
> the overhead is unchanged.
>
> As a result, socket and named FIFO event filters should be ready to run
> without the kernel lock. The f_event, f_modify and f_process callbacks
> should be MP-safe already.
>
> [1] https://marc.info/?l=openbsd-tech&m=160986578724696
>
> OK?
I've been running with this and unlocked sowakeup() for quite some time
now.
ok mpi@
> Index: kern/uipc_socket.c
> ===================================================================
> RCS file: src/sys/kern/uipc_socket.c,v
> retrieving revision 1.265
> diff -u -p -r1.265 uipc_socket.c
> --- kern/uipc_socket.c 14 Oct 2021 23:05:10 -0000 1.265
> +++ kern/uipc_socket.c 22 Oct 2021 12:17:57 -0000
> @@ -84,7 +84,7 @@ int filt_solistenprocess(struct knote *k
> int filt_solisten_common(struct knote *kn, struct socket *so);
>
> const struct filterops solisten_filtops = {
> - .f_flags = FILTEROP_ISFD,
> + .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
> .f_attach = NULL,
> .f_detach = filt_sordetach,
> .f_event = filt_solisten,
> @@ -93,7 +93,7 @@ const struct filterops solisten_filtops
> };
>
> const struct filterops soread_filtops = {
> - .f_flags = FILTEROP_ISFD,
> + .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
> .f_attach = NULL,
> .f_detach = filt_sordetach,
> .f_event = filt_soread,
> @@ -102,7 +102,7 @@ const struct filterops soread_filtops =
> };
>
> const struct filterops sowrite_filtops = {
> - .f_flags = FILTEROP_ISFD,
> + .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
> .f_attach = NULL,
> .f_detach = filt_sowdetach,
> .f_event = filt_sowrite,
> @@ -111,7 +111,7 @@ const struct filterops sowrite_filtops =
> };
>
> const struct filterops soexcept_filtops = {
> - .f_flags = FILTEROP_ISFD,
> + .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
> .f_attach = NULL,
> .f_detach = filt_sordetach,
> .f_event = filt_soread,
> @@ -169,6 +169,8 @@ socreate(int dom, struct socket **aso, i
> return (EPROTOTYPE);
> so = pool_get(&socket_pool, PR_WAITOK | PR_ZERO);
> rw_init(&so->so_lock, "solock");
> + klist_init(&so->so_rcv.sb_sel.si_note, &socket_klistops, so);
> + klist_init(&so->so_snd.sb_sel.si_note, &socket_klistops, so);
> sigio_init(&so->so_sigio);
> TAILQ_INIT(&so->so_q0);
> TAILQ_INIT(&so->so_q);
> @@ -258,6 +260,8 @@ sofree(struct socket *so, int s)
> }
> }
> sigio_free(&so->so_sigio);
> + klist_free(&so->so_rcv.sb_sel.si_note);
> + klist_free(&so->so_snd.sb_sel.si_note);
> #ifdef SOCKET_SPLICE
> if (so->so_sp) {
> if (issplicedback(so)) {
> @@ -2038,9 +2042,9 @@ soo_kqfilter(struct file *fp, struct kno
> {
> struct socket *so = kn->kn_fp->f_data;
> struct sockbuf *sb;
> + int s;
>
> - KERNEL_ASSERT_LOCKED();
> -
> + s = solock(so);
> switch (kn->kn_filter) {
> case EVFILT_READ:
> if (so->so_options & SO_ACCEPTCONN)
> @@ -2058,10 +2062,12 @@ soo_kqfilter(struct file *fp, struct kno
> sb = &so->so_rcv;
> break;
> default:
> + sounlock(so, s);
> return (EINVAL);
> }
>
> klist_insert_locked(&sb->sb_sel.si_note, kn);
> + sounlock(so, s);
>
> return (0);
> }
> @@ -2071,9 +2077,7 @@ filt_sordetach(struct knote *kn)
> {
> struct socket *so = kn->kn_fp->f_data;
>
> - KERNEL_ASSERT_LOCKED();
> -
> - klist_remove_locked(&so->so_rcv.sb_sel.si_note, kn);
> + klist_remove(&so->so_rcv.sb_sel.si_note, kn);
> }
>
> int
> @@ -2159,9 +2163,7 @@ filt_sowdetach(struct knote *kn)
> {
> struct socket *so = kn->kn_fp->f_data;
>
> - KERNEL_ASSERT_LOCKED();
> -
> - klist_remove_locked(&so->so_snd.sb_sel.si_note, kn);
> + klist_remove(&so->so_snd.sb_sel.si_note, kn);
> }
>
> int
> @@ -2284,6 +2286,36 @@ filt_solistenprocess(struct knote *kn, s
> return (rv);
> }
>
> +void
> +klist_soassertlk(void *arg)
> +{
> + struct socket *so = arg;
> +
> + soassertlocked(so);
> +}
> +
> +int
> +klist_solock(void *arg)
> +{
> + struct socket *so = arg;
> +
> + return (solock(so));
> +}
> +
> +void
> +klist_sounlock(void *arg, int ls)
> +{
> + struct socket *so = arg;
> +
> + sounlock(so, ls);
> +}
> +
> +const struct klistops socket_klistops = {
> + .klo_assertlk = klist_soassertlk,
> + .klo_lock = klist_solock,
> + .klo_unlock = klist_sounlock,
> +};
> +
> #ifdef DDB
> void
> sobuf_print(struct sockbuf *,
> Index: kern/uipc_socket2.c
> ===================================================================
> RCS file: src/sys/kern/uipc_socket2.c,v
> retrieving revision 1.113
> diff -u -p -r1.113 uipc_socket2.c
> --- kern/uipc_socket2.c 26 Jul 2021 05:51:13 -0000 1.113
> +++ kern/uipc_socket2.c 22 Oct 2021 12:17:57 -0000
> @@ -189,6 +189,8 @@ sonewconn(struct socket *head, int conns
> so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
> so->so_rcv.sb_timeo_nsecs = head->so_rcv.sb_timeo_nsecs;
>
> + klist_init(&so->so_rcv.sb_sel.si_note, &socket_klistops, so);
> + klist_init(&so->so_snd.sb_sel.si_note, &socket_klistops, so);
> sigio_init(&so->so_sigio);
> sigio_copy(&so->so_sigio, &head->so_sigio);
>
> @@ -196,6 +198,8 @@ sonewconn(struct socket *head, int conns
> if ((*so->so_proto->pr_attach)(so, 0)) {
> (void) soqremque(so, soqueue);
> sigio_free(&so->so_sigio);
> + klist_free(&so->so_rcv.sb_sel.si_note);
> + klist_free(&so->so_snd.sb_sel.si_note);
> pool_put(&socket_pool, so);
> return (NULL);
> }
> Index: miscfs/fifofs/fifo_vnops.c
> ===================================================================
> RCS file: src/sys/miscfs/fifofs/fifo_vnops.c,v
> retrieving revision 1.82
> diff -u -p -r1.82 fifo_vnops.c
> --- miscfs/fifofs/fifo_vnops.c 15 Oct 2021 06:30:06 -0000 1.82
> +++ miscfs/fifofs/fifo_vnops.c 22 Oct 2021 12:17:57 -0000
> @@ -114,7 +114,7 @@ int filt_fifowriteprocess(struct knote *
> int filt_fifowrite_common(struct knote *kn, struct socket *so);
>
> const struct filterops fiforead_filtops = {
> - .f_flags = FILTEROP_ISFD,
> + .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
> .f_attach = NULL,
> .f_detach = filt_fifordetach,
> .f_event = filt_fiforead,
> @@ -123,7 +123,7 @@ const struct filterops fiforead_filtops
> };
>
> const struct filterops fifowrite_filtops = {
> - .f_flags = FILTEROP_ISFD,
> + .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
> .f_attach = NULL,
> .f_detach = filt_fifowdetach,
> .f_event = filt_fifowrite,
> @@ -528,7 +528,7 @@ fifo_kqfilter(void *v)
>
> ap->a_kn->kn_hook = so;
>
> - klist_insert_locked(&sb->sb_sel.si_note, ap->a_kn);
> + klist_insert(&sb->sb_sel.si_note, ap->a_kn);
>
> return (0);
> }
> @@ -538,7 +538,7 @@ filt_fifordetach(struct knote *kn)
> {
> struct socket *so = (struct socket *)kn->kn_hook;
>
> - klist_remove_locked(&so->so_rcv.sb_sel.si_note, kn);
> + klist_remove(&so->so_rcv.sb_sel.si_note, kn);
> }
>
> int
> @@ -609,7 +609,7 @@ filt_fifowdetach(struct knote *kn)
> {
> struct socket *so = (struct socket *)kn->kn_hook;
>
> - klist_remove_locked(&so->so_snd.sb_sel.si_note, kn);
> + klist_remove(&so->so_snd.sb_sel.si_note, kn);
> }
>
> int
> Index: sys/event.h
> ===================================================================
> RCS file: src/sys/sys/event.h,v
> retrieving revision 1.56
> diff -u -p -r1.56 event.h
> --- sys/event.h 16 Jun 2021 14:26:30 -0000 1.56
> +++ sys/event.h 22 Oct 2021 12:17:57 -0000
> @@ -285,6 +285,7 @@ struct timespec;
>
> extern const struct filterops sig_filtops;
> extern const struct filterops dead_filtops;
> +extern const struct klistops socket_klistops;
>
> extern void kqpoll_init(void);
> extern void kqpoll_exit(void);
>