On 22/10/21(Fri) 13:11, Visa Hankala wrote:
> Here is another attempt to set klist lock for sockets. This is a revised
> version of a patch that I posted in January [1].
> 
> Using solock() for the klists is probably the easiest way at the time
> being. However, the lock is a potential point of contention because of
> the underlying big-lock design. The increase of overhead is related to
> adding and removing event registrations. With persistent registrations
> the overhead is unchanged.
> 
> As a result, socket and named FIFO event filters should be ready to run
> without the kernel lock. The f_event, f_modify and f_process callbacks
> should be MP-safe already.
> 
> [1] https://marc.info/?l=openbsd-tech&m=160986578724696
> 
> OK?

I've been running with this and unlocked sowakeup() for quite some time
now.

ok mpi@

> Index: kern/uipc_socket.c
> ===================================================================
> RCS file: src/sys/kern/uipc_socket.c,v
> retrieving revision 1.265
> diff -u -p -r1.265 uipc_socket.c
> --- kern/uipc_socket.c        14 Oct 2021 23:05:10 -0000      1.265
> +++ kern/uipc_socket.c        22 Oct 2021 12:17:57 -0000
> @@ -84,7 +84,7 @@ int filt_solistenprocess(struct knote *k
>  int  filt_solisten_common(struct knote *kn, struct socket *so);
>  
>  const struct filterops solisten_filtops = {
> -     .f_flags        = FILTEROP_ISFD,
> +     .f_flags        = FILTEROP_ISFD | FILTEROP_MPSAFE,
>       .f_attach       = NULL,
>       .f_detach       = filt_sordetach,
>       .f_event        = filt_solisten,
> @@ -93,7 +93,7 @@ const struct filterops solisten_filtops 
>  };
>  
>  const struct filterops soread_filtops = {
> -     .f_flags        = FILTEROP_ISFD,
> +     .f_flags        = FILTEROP_ISFD | FILTEROP_MPSAFE,
>       .f_attach       = NULL,
>       .f_detach       = filt_sordetach,
>       .f_event        = filt_soread,
> @@ -102,7 +102,7 @@ const struct filterops soread_filtops = 
>  };
>  
>  const struct filterops sowrite_filtops = {
> -     .f_flags        = FILTEROP_ISFD,
> +     .f_flags        = FILTEROP_ISFD | FILTEROP_MPSAFE,
>       .f_attach       = NULL,
>       .f_detach       = filt_sowdetach,
>       .f_event        = filt_sowrite,
> @@ -111,7 +111,7 @@ const struct filterops sowrite_filtops =
>  };
>  
>  const struct filterops soexcept_filtops = {
> -     .f_flags        = FILTEROP_ISFD,
> +     .f_flags        = FILTEROP_ISFD | FILTEROP_MPSAFE,
>       .f_attach       = NULL,
>       .f_detach       = filt_sordetach,
>       .f_event        = filt_soread,
> @@ -169,6 +169,8 @@ socreate(int dom, struct socket **aso, i
>               return (EPROTOTYPE);
>       so = pool_get(&socket_pool, PR_WAITOK | PR_ZERO);
>       rw_init(&so->so_lock, "solock");
> +     klist_init(&so->so_rcv.sb_sel.si_note, &socket_klistops, so);
> +     klist_init(&so->so_snd.sb_sel.si_note, &socket_klistops, so);
>       sigio_init(&so->so_sigio);
>       TAILQ_INIT(&so->so_q0);
>       TAILQ_INIT(&so->so_q);
> @@ -258,6 +260,8 @@ sofree(struct socket *so, int s)
>               }
>       }
>       sigio_free(&so->so_sigio);
> +     klist_free(&so->so_rcv.sb_sel.si_note);
> +     klist_free(&so->so_snd.sb_sel.si_note);
>  #ifdef SOCKET_SPLICE
>       if (so->so_sp) {
>               if (issplicedback(so)) {
> @@ -2038,9 +2042,9 @@ soo_kqfilter(struct file *fp, struct kno
>  {
>       struct socket *so = kn->kn_fp->f_data;
>       struct sockbuf *sb;
> +     int s;
>  
> -     KERNEL_ASSERT_LOCKED();
> -
> +     s = solock(so);
>       switch (kn->kn_filter) {
>       case EVFILT_READ:
>               if (so->so_options & SO_ACCEPTCONN)
> @@ -2058,10 +2062,12 @@ soo_kqfilter(struct file *fp, struct kno
>               sb = &so->so_rcv;
>               break;
>       default:
> +             sounlock(so, s);
>               return (EINVAL);
>       }
>  
>       klist_insert_locked(&sb->sb_sel.si_note, kn);
> +     sounlock(so, s);
>  
>       return (0);
>  }
> @@ -2071,9 +2077,7 @@ filt_sordetach(struct knote *kn)
>  {
>       struct socket *so = kn->kn_fp->f_data;
>  
> -     KERNEL_ASSERT_LOCKED();
> -
> -     klist_remove_locked(&so->so_rcv.sb_sel.si_note, kn);
> +     klist_remove(&so->so_rcv.sb_sel.si_note, kn);
>  }
>  
>  int
> @@ -2159,9 +2163,7 @@ filt_sowdetach(struct knote *kn)
>  {
>       struct socket *so = kn->kn_fp->f_data;
>  
> -     KERNEL_ASSERT_LOCKED();
> -
> -     klist_remove_locked(&so->so_snd.sb_sel.si_note, kn);
> +     klist_remove(&so->so_snd.sb_sel.si_note, kn);
>  }
>  
>  int
> @@ -2284,6 +2286,36 @@ filt_solistenprocess(struct knote *kn, s
>       return (rv);
>  }
>  
> +void
> +klist_soassertlk(void *arg)
> +{
> +     struct socket *so = arg;
> +
> +     soassertlocked(so);
> +}
> +
> +int
> +klist_solock(void *arg)
> +{
> +     struct socket *so = arg;
> +
> +     return (solock(so));
> +}
> +
> +void
> +klist_sounlock(void *arg, int ls)
> +{
> +     struct socket *so = arg;
> +
> +     sounlock(so, ls);
> +}
> +
> +const struct klistops socket_klistops = {
> +     .klo_assertlk   = klist_soassertlk,
> +     .klo_lock       = klist_solock,
> +     .klo_unlock     = klist_sounlock,
> +};
> +
>  #ifdef DDB
>  void
>  sobuf_print(struct sockbuf *,
> Index: kern/uipc_socket2.c
> ===================================================================
> RCS file: src/sys/kern/uipc_socket2.c,v
> retrieving revision 1.113
> diff -u -p -r1.113 uipc_socket2.c
> --- kern/uipc_socket2.c       26 Jul 2021 05:51:13 -0000      1.113
> +++ kern/uipc_socket2.c       22 Oct 2021 12:17:57 -0000
> @@ -189,6 +189,8 @@ sonewconn(struct socket *head, int conns
>       so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
>       so->so_rcv.sb_timeo_nsecs = head->so_rcv.sb_timeo_nsecs;
>  
> +     klist_init(&so->so_rcv.sb_sel.si_note, &socket_klistops, so);
> +     klist_init(&so->so_snd.sb_sel.si_note, &socket_klistops, so);
>       sigio_init(&so->so_sigio);
>       sigio_copy(&so->so_sigio, &head->so_sigio);
>  
> @@ -196,6 +198,8 @@ sonewconn(struct socket *head, int conns
>       if ((*so->so_proto->pr_attach)(so, 0)) {
>               (void) soqremque(so, soqueue);
>               sigio_free(&so->so_sigio);
> +             klist_free(&so->so_rcv.sb_sel.si_note);
> +             klist_free(&so->so_snd.sb_sel.si_note);
>               pool_put(&socket_pool, so);
>               return (NULL);
>       }
> Index: miscfs/fifofs/fifo_vnops.c
> ===================================================================
> RCS file: src/sys/miscfs/fifofs/fifo_vnops.c,v
> retrieving revision 1.82
> diff -u -p -r1.82 fifo_vnops.c
> --- miscfs/fifofs/fifo_vnops.c        15 Oct 2021 06:30:06 -0000      1.82
> +++ miscfs/fifofs/fifo_vnops.c        22 Oct 2021 12:17:57 -0000
> @@ -114,7 +114,7 @@ int       filt_fifowriteprocess(struct knote *
>  int  filt_fifowrite_common(struct knote *kn, struct socket *so);
>  
>  const struct filterops fiforead_filtops = {
> -     .f_flags        = FILTEROP_ISFD,
> +     .f_flags        = FILTEROP_ISFD | FILTEROP_MPSAFE,
>       .f_attach       = NULL,
>       .f_detach       = filt_fifordetach,
>       .f_event        = filt_fiforead,
> @@ -123,7 +123,7 @@ const struct filterops fiforead_filtops 
>  };
>  
>  const struct filterops fifowrite_filtops = {
> -     .f_flags        = FILTEROP_ISFD,
> +     .f_flags        = FILTEROP_ISFD | FILTEROP_MPSAFE,
>       .f_attach       = NULL,
>       .f_detach       = filt_fifowdetach,
>       .f_event        = filt_fifowrite,
> @@ -528,7 +528,7 @@ fifo_kqfilter(void *v)
>  
>       ap->a_kn->kn_hook = so;
>  
> -     klist_insert_locked(&sb->sb_sel.si_note, ap->a_kn);
> +     klist_insert(&sb->sb_sel.si_note, ap->a_kn);
>  
>       return (0);
>  }
> @@ -538,7 +538,7 @@ filt_fifordetach(struct knote *kn)
>  {
>       struct socket *so = (struct socket *)kn->kn_hook;
>  
> -     klist_remove_locked(&so->so_rcv.sb_sel.si_note, kn);
> +     klist_remove(&so->so_rcv.sb_sel.si_note, kn);
>  }
>  
>  int
> @@ -609,7 +609,7 @@ filt_fifowdetach(struct knote *kn)
>  {
>       struct socket *so = (struct socket *)kn->kn_hook;
>  
> -     klist_remove_locked(&so->so_snd.sb_sel.si_note, kn);
> +     klist_remove(&so->so_snd.sb_sel.si_note, kn);
>  }
>  
>  int
> Index: sys/event.h
> ===================================================================
> RCS file: src/sys/sys/event.h,v
> retrieving revision 1.56
> diff -u -p -r1.56 event.h
> --- sys/event.h       16 Jun 2021 14:26:30 -0000      1.56
> +++ sys/event.h       22 Oct 2021 12:17:57 -0000
> @@ -285,6 +285,7 @@ struct timespec;
>  
>  extern const struct filterops sig_filtops;
>  extern const struct filterops dead_filtops;
> +extern const struct klistops socket_klistops;
>  
>  extern void  kqpoll_init(void);
>  extern void  kqpoll_exit(void);
> 

Reply via email to