Hi,

i did some performance measurements with and without your diff on
OpenBSD-current.

There is no performance difference. I think this is the expected outcome.

BR
Simon

2016-11-04 10:12 GMT+01:00, Martin Pieuchot <[email protected]>:
> On 03/11/16(Thu) 11:21, Martin Pieuchot wrote:
>> Here's the next iteration of my diff introducing a rwlock to serialize
>> the network input path with socket paths.  Changes are:
>>
>>   - more timeout_set_proc() that should fix problems reported by
>>     Chris Jackman.
>>
>>   - I introduced a set of macro to make it easier to audit existing
>>     splsoftnet().
>>
>>   - It makes use of splassert_fail() if the lock is not held.
>>
>>
>> My plan is to commit it, assuming it is stable enough, then fix the
>> remaining issues in tree.  This includes:
>>
>>   - Analyze and if needed fix the two code paths were we do an
>> unlock/lock
>>     dance
>>
>>   - Remove unneeded/recursive splsoftnet() dances.
>>
>> Once that's done we should be able to remove the KERNEL_LOCK() from the
>> input path.
>>
>> So please test and report back.
>
> Updated version that prevents a recursion in doaccept(), reported by Nils
> Frohberg.
>
> diff --git sys/kern/sys_socket.c sys/kern/sys_socket.c
> index 7a90f78..a7be8a1 100644
> --- sys/kern/sys_socket.c
> +++ sys/kern/sys_socket.c
> @@ -133,7 +133,7 @@ soo_poll(struct file *fp, int events, struct proc *p)
>       int revents = 0;
>       int s;
>
> -     s = splsoftnet();
> +     SOCKET_LOCK(s);
>       if (events & (POLLIN | POLLRDNORM)) {
>               if (soreadable(so))
>                       revents |= events & (POLLIN | POLLRDNORM);
> @@ -159,7 +159,7 @@ soo_poll(struct file *fp, int events, struct proc *p)
>                       so->so_snd.sb_flagsintr |= SB_SEL;
>               }
>       }
> -     splx(s);
> +     SOCKET_UNLOCK(s);
>       return (revents);
>  }
>
> diff --git sys/kern/uipc_socket.c sys/kern/uipc_socket.c
> index 9e8d05f..dd067b3 100644
> --- sys/kern/uipc_socket.c
> +++ sys/kern/uipc_socket.c
> @@ -89,6 +89,11 @@ struct pool sosplice_pool;
>  struct taskq *sosplice_taskq;
>  #endif
>
> +/*
> + * Serialize socket operations.
> + */
> +struct rwlock socketlock = RWLOCK_INITIALIZER("socketlock");
> +
>  void
>  soinit(void)
>  {
> @@ -123,7 +128,7 @@ socreate(int dom, struct socket **aso, int type, int
> proto)
>               return (EPROTONOSUPPORT);
>       if (prp->pr_type != type)
>               return (EPROTOTYPE);
> -     s = splsoftnet();
> +     SOCKET_LOCK(s);
>       so = pool_get(&socket_pool, PR_WAITOK | PR_ZERO);
>       TAILQ_INIT(&so->so_q0);
>       TAILQ_INIT(&so->so_q);
> @@ -141,10 +146,10 @@ socreate(int dom, struct socket **aso, int type, int
> proto)
>       if (error) {
>               so->so_state |= SS_NOFDREF;
>               sofree(so);
> -             splx(s);
> +             SOCKET_UNLOCK(s);
>               return (error);
>       }
> -     splx(s);
> +     SOCKET_UNLOCK(s);
>       *aso = so;
>       return (0);
>  }
> @@ -154,9 +159,9 @@ sobind(struct socket *so, struct mbuf *nam, struct proc
> *p)
>  {
>       int s, error;
>
> -     s = splsoftnet();
> +     SOCKET_LOCK(s);
>       error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, p);
> -     splx(s);
> +     SOCKET_UNLOCK(s);
>       return (error);
>  }
>
> @@ -171,11 +176,11 @@ solisten(struct socket *so, int backlog)
>       if (isspliced(so) || issplicedback(so))
>               return (EOPNOTSUPP);
>  #endif /* SOCKET_SPLICE */
> -     s = splsoftnet();
> +     SOCKET_LOCK(s);
>       error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL,
>           curproc);
>       if (error) {
> -             splx(s);
> +             SOCKET_UNLOCK(s);
>               return (error);
>       }
>       if (TAILQ_FIRST(&so->so_q) == NULL)
> @@ -185,14 +190,14 @@ solisten(struct socket *so, int backlog)
>       if (backlog < sominconn)
>               backlog = sominconn;
>       so->so_qlimit = backlog;
> -     splx(s);
> +     SOCKET_UNLOCK(s);
>       return (0);
>  }
>
>  void
>  sofree(struct socket *so)
>  {
> -     splsoftassert(IPL_SOFTNET);
> +     SOCKET_ASSERT_LOCKED();
>
>       if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
>               return;
> @@ -232,7 +237,7 @@ soclose(struct socket *so)
>       struct socket *so2;
>       int s, error = 0;
>
> -     s = splsoftnet();
> +     SOCKET_LOCK(s);
>       if (so->so_options & SO_ACCEPTCONN) {
>               while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) {
>                       (void) soqremque(so2, 0);
> @@ -256,7 +261,7 @@ soclose(struct socket *so)
>                           (so->so_state & SS_NBIO))
>                               goto drop;
>                       while (so->so_state & SS_ISCONNECTED) {
> -                             error = tsleep(&so->so_timeo,
> +                             error = rwsleep(&so->so_timeo, &socketlock,
>                                   PSOCK | PCATCH, "netcls",
>                                   so->so_linger * hz);
>                               if (error)
> @@ -276,14 +281,14 @@ discard:
>               panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type);
>       so->so_state |= SS_NOFDREF;
>       sofree(so);
> -     splx(s);
> +     SOCKET_UNLOCK(s);
>       return (error);
>  }
>
>  int
>  soabort(struct socket *so)
>  {
> -     splsoftassert(IPL_SOFTNET);
> +     SOCKET_ASSERT_LOCKED();
>
>       return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, NULL, NULL, NULL,
>          curproc);
> @@ -294,7 +299,7 @@ soaccept(struct socket *so, struct mbuf *nam)
>  {
>       int error = 0;
>
> -     splsoftassert(IPL_SOFTNET);
> +     SOCKET_ASSERT_LOCKED();
>
>       if ((so->so_state & SS_NOFDREF) == 0)
>               panic("soaccept !NOFDREF: so %p, so_type %d", so, so->so_type);
> @@ -315,7 +320,7 @@ soconnect(struct socket *so, struct mbuf *nam)
>
>       if (so->so_options & SO_ACCEPTCONN)
>               return (EOPNOTSUPP);
> -     s = splsoftnet();
> +     SOCKET_LOCK(s);
>       /*
>        * If protocol is connection-based, can only connect once.
>        * Otherwise, if connected, try to disconnect first.
> @@ -329,7 +334,7 @@ soconnect(struct socket *so, struct mbuf *nam)
>       else
>               error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
>                   NULL, nam, NULL, curproc);
> -     splx(s);
> +     SOCKET_UNLOCK(s);
>       return (error);
>  }
>
> @@ -338,10 +343,10 @@ soconnect2(struct socket *so1, struct socket *so2)
>  {
>       int s, error;
>
> -     s = splsoftnet();
> +     SOCKET_LOCK(s);
>       error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL,
>           (struct mbuf *)so2, NULL, curproc);
> -     splx(s);
> +     SOCKET_UNLOCK(s);
>       return (error);
>  }
>
> @@ -350,7 +355,7 @@ sodisconnect(struct socket *so)
>  {
>       int error;
>
> -     splsoftassert(IPL_SOFTNET);
> +     SOCKET_ASSERT_LOCKED();
>
>       if ((so->so_state & SS_ISCONNECTED) == 0)
>               return (ENOTCONN);
> @@ -418,21 +423,20 @@ sosend(struct socket *so, struct mbuf *addr, struct
> uio *uio, struct mbuf *top,
>                           (sizeof(struct file *) / sizeof(int)));
>       }
>
> -#define      snderr(errno)   { error = errno; splx(s); goto release; }
> +#define      snderr(errno)   { error = errno; SOCKET_UNLOCK(s); goto 
> release; }
>
>  restart:
>       if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0)
>               goto out;
>       so->so_state |= SS_ISSENDING;
>       do {
> -             s = splsoftnet();
> +             SOCKET_LOCK(s);
>               if (so->so_state & SS_CANTSENDMORE)
>                       snderr(EPIPE);
>               if (so->so_error) {
>                       error = so->so_error;
>                       so->so_error = 0;
> -                     splx(s);
> -                     goto release;
> +                     snderr(error);
>               }
>               if ((so->so_state & SS_ISCONNECTED) == 0) {
>                       if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
> @@ -456,12 +460,12 @@ restart:
>                       sbunlock(&so->so_snd);
>                       error = sbwait(&so->so_snd);
>                       so->so_state &= ~SS_ISSENDING;
> -                     splx(s);
> +                     SOCKET_UNLOCK(s);
>                       if (error)
>                               goto out;
>                       goto restart;
>               }
> -             splx(s);
> +             SOCKET_UNLOCK(s);
>               space -= clen;
>               do {
>                       if (uio == NULL) {
> @@ -481,13 +485,13 @@ restart:
>                               if (flags & MSG_EOR)
>                                       top->m_flags |= M_EOR;
>                       }
> -                     s = splsoftnet();               /* XXX */
> +                     SOCKET_LOCK(s);
>                       if (resid == 0)
>                               so->so_state &= ~SS_ISSENDING;
>                       error = (*so->so_proto->pr_usrreq)(so,
>                           (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
>                           top, addr, control, curproc);
> -                     splx(s);
> +                     SOCKET_UNLOCK(s);
>                       clen = 0;
>                       control = NULL;
>                       top = NULL;
> @@ -617,8 +621,8 @@ sbsync(struct sockbuf *sb, struct mbuf *nextrecord)
>   * must begin with an address if the protocol so specifies,
>   * followed by an optional mbuf or mbufs containing ancillary data,
>   * and then zero or more mbufs of data.
> - * In order to avoid blocking network interrupts for the entire time here,
> - * we splx() while doing the actual copy to user space.
> + * In order to avoid blocking network for the entire time here, we splx()
> + * and release ``socketlock'' while doing the actual copy to user space.
>   * Although the sockbuf is locked, new data may still be appended,
>   * and thus we must maintain consistency of the sockbuf during that time.
>   *
> @@ -672,7 +676,7 @@ bad:
>  restart:
>       if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0)
>               return (error);
> -     s = splsoftnet();
> +     SOCKET_LOCK(s);
>
>       m = so->so_rcv.sb_mb;
>  #ifdef SOCKET_SPLICE
> @@ -737,7 +741,7 @@ restart:
>               SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1");
>               sbunlock(&so->so_rcv);
>               error = sbwait(&so->so_rcv);
> -             splx(s);
> +             SOCKET_UNLOCK(s);
>               if (error)
>                       return (error);
>               goto restart;
> @@ -871,9 +875,9 @@ dontblock:
>                       SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove");
>                       SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove");
>                       resid = uio->uio_resid;
> -                     splx(s);
> +                     SOCKET_UNLOCK(s);
>                       uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio);
> -                     s = splsoftnet();
> +                     SOCKET_LOCK(s);
>                       if (uio_error)
>                               uio->uio_resid = resid - len;
>               } else
> @@ -955,7 +959,7 @@ dontblock:
>                       error = sbwait(&so->so_rcv);
>                       if (error) {
>                               sbunlock(&so->so_rcv);
> -                             splx(s);
> +                             SOCKET_UNLOCK(s);
>                               return (0);
>                       }
>                       if ((m = so->so_rcv.sb_mb) != NULL)
> @@ -991,7 +995,7 @@ dontblock:
>       if (orig_resid == uio->uio_resid && orig_resid &&
>           (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
>               sbunlock(&so->so_rcv);
> -             splx(s);
> +             SOCKET_UNLOCK(s);
>               goto restart;
>       }
>
> @@ -1002,7 +1006,7 @@ dontblock:
>               *flagsp |= flags;
>  release:
>       sbunlock(&so->so_rcv);
> -     splx(s);
> +     SOCKET_UNLOCK(s);
>       return (error);
>  }
>
> @@ -1012,7 +1016,7 @@ soshutdown(struct socket *so, int how)
>       struct protosw *pr = so->so_proto;
>       int s, error = 0;
>
> -     s = splsoftnet();
> +     SOCKET_LOCK(s);
>       switch (how) {
>       case SHUT_RD:
>       case SHUT_RDWR:
> @@ -1028,7 +1032,8 @@ soshutdown(struct socket *so, int how)
>               error = EINVAL;
>               break;
>       }
> -     splx(s);
> +     SOCKET_UNLOCK(s);
> +
>       return (error);
>  }
>
> @@ -1042,6 +1047,7 @@ sorflush(struct socket *so)
>
>       sb->sb_flags |= SB_NOINTR;
>       (void) sblock(sb, M_WAITOK);
> +     /* XXXSMP */
>       s = splnet();
>       socantrcvmore(so);
>       sbunlock(sb);
> @@ -1095,10 +1101,10 @@ sosplice(struct socket *so, int fd, off_t max,
> struct timeval *tv)
>               if ((error = sblock(&so->so_rcv,
>                   (so->so_state & SS_NBIO) ? M_NOWAIT : M_WAITOK)) != 0)
>                       return (error);
> -             s = splsoftnet();
> +             SOCKET_LOCK(s);
>               if (so->so_sp->ssp_socket)
>                       sounsplice(so, so->so_sp->ssp_socket, 1);
> -             splx(s);
> +             SOCKET_UNLOCK(s);
>               sbunlock(&so->so_rcv);
>               return (0);
>       }
> @@ -1127,7 +1133,7 @@ sosplice(struct socket *so, int fd, off_t max, struct
> timeval *tv)
>               FRELE(fp, curproc);
>               return (error);
>       }
> -     s = splsoftnet();
> +     SOCKET_LOCK(s);
>
>       if (so->so_sp->ssp_socket || sosp->so_sp->ssp_soback) {
>               error = EBUSY;
> @@ -1168,7 +1174,7 @@ sosplice(struct socket *so, int fd, off_t max, struct
> timeval *tv)
>       }
>
>   release:
> -     splx(s);
> +     SOCKET_UNLOCK(s);
>       sbunlock(&sosp->so_snd);
>       sbunlock(&so->so_rcv);
>       FRELE(fp, curproc);
> @@ -1178,7 +1184,7 @@ sosplice(struct socket *so, int fd, off_t max, struct
> timeval *tv)
>  void
>  sounsplice(struct socket *so, struct socket *sosp, int wakeup)
>  {
> -     splsoftassert(IPL_SOFTNET);
> +     SOCKET_ASSERT_LOCKED();
>
>       task_del(sosplice_taskq, &so->so_splicetask);
>       timeout_del(&so->so_idleto);
> @@ -1195,12 +1201,12 @@ soidle(void *arg)
>       struct socket *so = arg;
>       int s;
>
> -     s = splsoftnet();
> +     SOCKET_LOCK(s);
>       if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
>               so->so_error = ETIMEDOUT;
>               sounsplice(so, so->so_sp->ssp_socket, 1);
>       }
> -     splx(s);
> +     SOCKET_UNLOCK(s);
>  }
>
>  void
> @@ -1209,7 +1215,7 @@ sotask(void *arg)
>       struct socket *so = arg;
>       int s;
>
> -     s = splsoftnet();
> +     SOCKET_LOCK(s);
>       if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
>               /*
>                * We may not sleep here as sofree() and unsplice() may be
> @@ -1218,7 +1224,7 @@ sotask(void *arg)
>                */
>               somove(so, M_DONTWAIT);
>       }
> -     splx(s);
> +     SOCKET_UNLOCK(s);
>
>       /* Avoid user land starvation. */
>       yield();
> @@ -1240,7 +1246,7 @@ somove(struct socket *so, int wait)
>       int              error = 0, maxreached = 0;
>       short            state;
>
> -     splsoftassert(IPL_SOFTNET);
> +     SOCKET_ASSERT_LOCKED();
>
>   nextpkt:
>       if (so->so_error) {
> @@ -1502,7 +1508,7 @@ somove(struct socket *so, int wait)
>  void
>  sorwakeup(struct socket *so)
>  {
> -     splsoftassert(IPL_SOFTNET);
> +     SOCKET_ASSERT_LOCKED();
>
>  #ifdef SOCKET_SPLICE
>       if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
> @@ -1523,14 +1529,18 @@ sorwakeup(struct socket *so)
>               return;
>  #endif
>       sowakeup(so, &so->so_rcv);
> -     if (so->so_upcall)
> +     if (so->so_upcall) {
> +             /* XXXSMP breaks atomicity */
> +             rw_exit_write(&socketlock);
>               (*(so->so_upcall))(so, so->so_upcallarg, M_DONTWAIT);
> +             rw_enter_write(&socketlock);
> +     }
>  }
>
>  void
>  sowwakeup(struct socket *so)
>  {
> -     splsoftassert(IPL_SOFTNET);
> +     SOCKET_ASSERT_LOCKED();
>
>  #ifdef SOCKET_SPLICE
>       if (so->so_snd.sb_flagsintr & SB_SPLICE)
> @@ -1876,7 +1886,8 @@ soo_kqfilter(struct file *fp, struct knote *kn)
>  {
>       struct socket *so = kn->kn_fp->f_data;
>       struct sockbuf *sb;
> -     int s;
> +
> +     KERNEL_ASSERT_LOCKED();
>
>       switch (kn->kn_filter) {
>       case EVFILT_READ:
> @@ -1894,10 +1905,9 @@ soo_kqfilter(struct file *fp, struct knote *kn)
>               return (EINVAL);
>       }
>
> -     s = splnet();
>       SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext);
>       sb->sb_flags |= SB_KNOTE;
> -     splx(s);
> +
>       return (0);
>  }
>
> @@ -1905,12 +1915,12 @@ void
>  filt_sordetach(struct knote *kn)
>  {
>       struct socket *so = kn->kn_fp->f_data;
> -     int s = splnet();
> +
> +     KERNEL_ASSERT_LOCKED();
>
>       SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext);
>       if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note))
>               so->so_rcv.sb_flags &= ~SB_KNOTE;
> -     splx(s);
>  }
>
>  int
> @@ -1939,12 +1949,12 @@ void
>  filt_sowdetach(struct knote *kn)
>  {
>       struct socket *so = kn->kn_fp->f_data;
> -     int s = splnet();
> +
> +     KERNEL_ASSERT_LOCKED();
>
>       SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext);
>       if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note))
>               so->so_snd.sb_flags &= ~SB_KNOTE;
> -     splx(s);
>  }
>
>  int
> diff --git sys/kern/uipc_socket2.c sys/kern/uipc_socket2.c
> index c3b7c3a..ed9fa6f 100644
> --- sys/kern/uipc_socket2.c
> +++ sys/kern/uipc_socket2.c
> @@ -145,7 +145,7 @@ sonewconn(struct socket *head, int connstatus)
>       struct socket *so;
>       int soqueue = connstatus ? 1 : 0;
>
> -     splsoftassert(IPL_SOFTNET);
> +     SOCKET_ASSERT_LOCKED();
>
>       if (mclpools[0].pr_nout > mclpools[0].pr_hardlimit * 95 / 100)
>               return (NULL);
> @@ -274,10 +274,10 @@ socantrcvmore(struct socket *so)
>  int
>  sbwait(struct sockbuf *sb)
>  {
> -     splsoftassert(IPL_SOFTNET);
> +     SOCKET_ASSERT_LOCKED();
>
>       sb->sb_flagsintr |= SB_WAIT;
> -     return (tsleep(&sb->sb_cc,
> +     return (rwsleep(&sb->sb_cc, &socketlock,
>           (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "netio",
>           sb->sb_timeo));
>  }
> @@ -315,7 +315,7 @@ sbunlock(struct sockbuf *sb)
>  void
>  sowakeup(struct socket *so, struct sockbuf *sb)
>  {
> -     splsoftassert(IPL_SOFTNET);
> +     SOCKET_ASSERT_LOCKED();
>
>       selwakeup(&sb->sb_sel);
>       sb->sb_flagsintr &= ~SB_SEL;
> diff --git sys/kern/uipc_syscalls.c sys/kern/uipc_syscalls.c
> index e064bc9..fd54e70 100644
> --- sys/kern/uipc_syscalls.c
> +++ sys/kern/uipc_syscalls.c
> @@ -276,16 +276,11 @@ doaccept(struct proc *p, int sock, struct sockaddr
> *name, socklen_t *anamelen,
>       if ((error = getsock(p, sock, &fp)) != 0)
>               return (error);
>
> -     s = splsoftnet();
>       headfp = fp;
> -     head = fp->f_data;
> -
> -     if (isdnssocket((struct socket *)fp->f_data)) {
> -             error = EINVAL;
> -             goto bad;
> -     }
>  redo:
> -     if ((head->so_options & SO_ACCEPTCONN) == 0) {
> +     SOCKET_LOCK(s);
> +     head = headfp->f_data;
> +     if (isdnssocket(head) || (head->so_options & SO_ACCEPTCONN) == 0) {
>               error = EINVAL;
>               goto bad;
>       }
> @@ -301,7 +296,8 @@ redo:
>                       head->so_error = ECONNABORTED;
>                       break;
>               }
> -             error = tsleep(&head->so_timeo, PSOCK | PCATCH, "netcon", 0);
> +             error = rwsleep(&head->so_timeo, &socketlock, PSOCK | PCATCH,
> +                 "netcon", 0);
>               if (error) {
>                       goto bad;
>               }
> @@ -311,7 +307,7 @@ redo:
>               head->so_error = 0;
>               goto bad;
>       }
> -     
> +
>       /* Figure out whether the new socket should be non-blocking. */
>       nflag = flags & SOCK_NONBLOCK_INHERIT ? (headfp->f_flag & FNONBLOCK)
>           : (flags & SOCK_NONBLOCK ? FNONBLOCK : 0);
> @@ -338,6 +334,7 @@ redo:
>        * or another thread or process to accept it.  If so, start over.
>        */
>       if (head->so_qlen == 0) {
> +             SOCKET_UNLOCK(s);
>               m_freem(nam);
>               fdplock(fdp);
>               fdremove(fdp, tmpfd);
> @@ -366,18 +363,23 @@ redo:
>
>       if (error) {
>               /* if an error occurred, free the file descriptor */
> +             SOCKET_UNLOCK(s);
> +             m_freem(nam);
>               fdplock(fdp);
>               fdremove(fdp, tmpfd);
>               closef(fp, p);
>               fdpunlock(fdp);
> +             goto out;
>       } else {
>               (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&nflag, p);
>               FILE_SET_MATURE(fp, p);
>               *retval = tmpfd;
> +             m_freem(nam);
>       }
> -     m_freem(nam);
> +
>  bad:
> -     splx(s);
> +     SOCKET_UNLOCK(s);
> +out:
>       FRELE(headfp, p);
>       return (error);
>  }
> @@ -434,9 +436,10 @@ sys_connect(struct proc *p, void *v, register_t
> *retval)
>               m_freem(nam);
>               return (EINPROGRESS);
>       }
> -     s = splsoftnet();
> +     SOCKET_LOCK(s);
>       while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
> -             error = tsleep(&so->so_timeo, PSOCK | PCATCH, "netcon2", 0);
> +             error = rwsleep(&so->so_timeo, &socketlock, PSOCK | PCATCH,
> +                 "netcon2", 0);
>               if (error) {
>                       if (error == EINTR || error == ERESTART)
>                               interrupted = 1;
> @@ -447,7 +450,7 @@ sys_connect(struct proc *p, void *v, register_t
> *retval)
>               error = so->so_error;
>               so->so_error = 0;
>       }
> -     splx(s);
> +     SOCKET_UNLOCK(s);
>  bad:
>       if (!interrupted)
>               so->so_state &= ~SS_ISCONNECTING;
> diff --git sys/kern/uipc_usrreq.c sys/kern/uipc_usrreq.c
> index e0f7f27..84bab2f 100644
> --- sys/kern/uipc_usrreq.c
> +++ sys/kern/uipc_usrreq.c
> @@ -131,7 +131,11 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m,
> struct mbuf *nam,
>               break;
>
>       case PRU_BIND:
> +             /* XXXSMP breaks atomicity */
> +             rw_assert_wrlock(&socketlock);
> +             rw_exit_write(&socketlock);
>               error = unp_bind(unp, nam, p);
> +             rw_enter_write(&socketlock);
>               break;
>
>       case PRU_LISTEN:
> diff --git sys/net/if.c sys/net/if.c
> index b7c9e11..797344f 100644
> --- sys/net/if.c
> +++ sys/net/if.c
> @@ -160,7 +160,8 @@ void      if_netisr(void *);
>  void ifa_print_all(void);
>  #endif
>
> -void if_start_locked(struct ifnet *ifp);
> +void if_start_locked(struct ifnet *);
> +int  if_ioctl_locked(struct socket *, u_long, caddr_t, struct proc *);
>
>  /*
>   * interface index map
> @@ -835,10 +836,15 @@ if_netisr(void *unused)
>       int s;
>
>       KERNEL_LOCK();
> -     s = splsoftnet();
> +     SOCKET_LOCK(s);
>
>       while ((n = netisr) != 0) {
> -             sched_pause();
> +             /* Like sched_pause() but with a rwlock dance. */
> +             if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) {
> +                     SOCKET_UNLOCK(s);
> +                     yield();
> +                     SOCKET_LOCK(s);
> +             }
>
>               atomic_clearbits_int(&netisr, n);
>
> @@ -876,7 +882,7 @@ if_netisr(void *unused)
>               pfsyncintr();
>  #endif
>
> -     splx(s);
> +     SOCKET_UNLOCK(s);
>       KERNEL_UNLOCK();
>  }
>
> @@ -1429,7 +1435,7 @@ if_downall(void)
>       struct ifnet *ifp;
>       int s;
>
> -     s = splnet();
> +     SOCKET_LOCK(s);
>       TAILQ_FOREACH(ifp, &ifnet, if_list) {
>               if ((ifp->if_flags & IFF_UP) == 0)
>                       continue;
> @@ -1442,7 +1448,7 @@ if_downall(void)
>                           (caddr_t)&ifrq);
>               }
>       }
> -     splx(s);
> +     SOCKET_UNLOCK(s);
>  }
>
>  /*
> @@ -1502,9 +1508,9 @@ if_linkstate_task(void *xifidx)
>       if (ifp == NULL)
>               return;
>
> -     s = splsoftnet();
> +     SOCKET_LOCK(s);
>       if_linkstate(ifp);
> -     splx(s);
> +     SOCKET_UNLOCK(s);
>
>       if_put(ifp);
>  }
> @@ -1512,7 +1518,7 @@ if_linkstate_task(void *xifidx)
>  void
>  if_linkstate(struct ifnet *ifp)
>  {
> -     splsoftassert(IPL_SOFTNET);
> +     SOCKET_ASSERT_LOCKED();
>
>       rt_ifmsg(ifp);
>  #ifndef SMALL_KERNEL
> @@ -1703,6 +1709,18 @@ if_setrdomain(struct ifnet *ifp, int rdomain)
>  int
>  ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
>  {
> +     int s, error;
> +
> +     SOCKET_LOCK(s);
> +     error = if_ioctl_locked(so, cmd, data, p);
> +     SOCKET_UNLOCK(s);
> +
> +     return (error);
> +}
> +
> +int
> +if_ioctl_locked(struct socket *so, u_long cmd, caddr_t data, struct proc
> *p)
> +{
>       struct ifnet *ifp;
>       struct ifreq *ifr;
>       struct sockaddr_dl *sdl;
> @@ -1751,20 +1769,15 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data,
> struct proc *p)
>               switch (ifar->ifar_af) {
>               case AF_INET:
>                       /* attach is a noop for AF_INET */
> -                     if (cmd == SIOCIFAFDETACH) {
> -                             s = splsoftnet();
> +                     if (cmd == SIOCIFAFDETACH)
>                               in_ifdetach(ifp);
> -                             splx(s);
> -                     }
>                       return (0);
>  #ifdef INET6
>               case AF_INET6:
> -                     s = splsoftnet();
>                       if (cmd == SIOCIFAFATTACH)
>                               error = in6_ifattach(ifp);
>                       else
>                               in6_ifdetach(ifp);
> -                     splx(s);
>                       return (error);
>  #endif /* INET6 */
>               default:
> diff --git sys/net/route.c sys/net/route.c
> index a04b095..ab2b924 100644
> --- sys/net/route.c
> +++ sys/net/route.c
> @@ -547,7 +547,7 @@ rtredirect(struct sockaddr *dst, struct sockaddr
> *gateway,
>       int                      flags = RTF_GATEWAY|RTF_HOST;
>       uint8_t                  prio = RTP_NONE;
>
> -     splsoftassert(IPL_SOFTNET);
> +     SOCKET_ASSERT_LOCKED();
>
>       /* verify the gateway is directly reachable */
>       if ((ifa = ifa_ifwithnet(gateway, rdomain)) == NULL) {
> @@ -1498,6 +1498,8 @@ rt_timer_queue_destroy(struct rttimer_queue *rtq)
>  {
>       struct rttimer  *r;
>
> +     SOCKET_ASSERT_LOCKED();
> +
>       while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) {
>               LIST_REMOVE(r, rtt_link);
>               TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
> @@ -1590,7 +1592,7 @@ rt_timer_timer(void *arg)
>
>       current_time = time_uptime;
>
> -     s = splsoftnet();
> +     SOCKET_LOCK(s);
>       for (rtq = LIST_FIRST(&rttimer_queue_head); rtq != NULL;
>            rtq = LIST_NEXT(rtq, rtq_link)) {
>               while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL &&
> @@ -1605,7 +1607,7 @@ rt_timer_timer(void *arg)
>                               printf("rt_timer_timer: rtq_count reached 0\n");
>               }
>       }
> -     splx(s);
> +     SOCKET_UNLOCK(s);
>
>       timeout_add_sec(to, 1);
>  }
> diff --git sys/net/rtsock.c sys/net/rtsock.c
> index 46150c6..a40c6c8 100644
> --- sys/net/rtsock.c
> +++ sys/net/rtsock.c
> @@ -296,6 +296,7 @@ route_ctloutput(int op, struct socket *so, int level,
> int optname,
>       return (error);
>  }
>
> +/* XXXSMP */
>  void
>  rt_senddesync(void *data)
>  {
> diff --git sys/netinet/if_ether.c sys/netinet/if_ether.c
> index da076c2..9f43add 100644
> --- sys/netinet/if_ether.c
> +++ sys/netinet/if_ether.c
> @@ -110,10 +110,10 @@ void
>  arptimer(void *arg)
>  {
>       struct timeout *to = (struct timeout *)arg;
> -     int s;
>       struct llinfo_arp *la, *nla;
> +     int s;
>
> -     s = splsoftnet();
> +     SOCKET_LOCK(s);
>       timeout_add_sec(to, arpt_prune);
>       LIST_FOREACH_SAFE(la, &arp_list, la_list, nla) {
>               struct rtentry *rt = la->la_rt;
> @@ -121,7 +121,7 @@ arptimer(void *arg)
>               if (rt->rt_expire && rt->rt_expire <= time_uptime)
>                       arptfree(rt); /* timer has expired; clear */
>       }
> -     splx(s);
> +     SOCKET_UNLOCK(s);
>  }
>
>  void
> @@ -138,7 +138,7 @@ arp_rtrequest(struct ifnet *ifp, int req, struct rtentry
> *rt)
>               pool_init(&arp_pool, sizeof(struct llinfo_arp), 0,
>                   IPL_SOFTNET, 0, "arp", NULL);
>
> -             timeout_set(&arptimer_to, arptimer, &arptimer_to);
> +             timeout_set_proc(&arptimer_to, arptimer, &arptimer_to);
>               timeout_add_sec(&arptimer_to, 1);
>       }
>
> diff --git sys/netinet/ip_carp.c sys/netinet/ip_carp.c
> index ff3ae78..1c4aa86 100644
> --- sys/netinet/ip_carp.c
> +++ sys/netinet/ip_carp.c
> @@ -1045,7 +1045,7 @@ carp_send_ad(void *v)
>               return;
>       }
>
> -     s = splsoftnet();
> +     SOCKET_LOCK(s);
>
>       /* bow out if we've gone to backup (the carp interface is going down) */
>       if (sc->sc_bow_out) {
> @@ -1246,7 +1246,7 @@ carp_send_ad(void *v)
>
>  retry_later:
>       sc->cur_vhe = NULL;
> -     splx(s);
> +     SOCKET_UNLOCK(s);
>       if (advbase != 255 || advskew != 255)
>               timeout_add(&vhe->ad_tmo, tvtohz(&tv));
>  }
> diff --git sys/netinet/ip_icmp.c sys/netinet/ip_icmp.c
> index cdd60aa..57d4553 100644
> --- sys/netinet/ip_icmp.c
> +++ sys/netinet/ip_icmp.c
> @@ -884,7 +884,7 @@ icmp_sysctl(int *name, u_int namelen, void *oldp, size_t
> *oldlenp, void *newp,
>       if (namelen != 1)
>               return (ENOTDIR);
>
> -     s = splsoftnet();
> +     SOCKET_LOCK(s);
>       switch (name[0]) {
>       case ICMPCTL_REDIRTIMEOUT:
>
> @@ -921,7 +921,7 @@ icmp_sysctl(int *name, u_int namelen, void *oldp, size_t
> *oldlenp, void *newp,
>               error = ENOPROTOOPT;
>               break;
>       }
> -     splx(s);
> +     SOCKET_UNLOCK(s);
>
>       return (error);
>  }
> @@ -1046,7 +1046,8 @@ void
>  icmp_mtudisc_timeout(struct rtentry *rt, struct rttimer *r)
>  {
>       struct ifnet *ifp;
> -     int s;
> +
> +     SOCKET_ASSERT_LOCKED();
>
>       ifp = if_get(rt->rt_ifidx);
>       if (ifp == NULL)
> @@ -1058,7 +1059,6 @@ icmp_mtudisc_timeout(struct rtentry *rt, struct
> rttimer *r)
>
>               sin = *satosin(rt_key(rt));
>
> -             s = splsoftnet();
>               rtdeletemsg(rt, ifp, r->rtt_tableid);
>
>               /* Notify TCP layer of increased Path MTU estimate */
> @@ -1066,7 +1066,6 @@ icmp_mtudisc_timeout(struct rtentry *rt, struct
> rttimer *r)
>               if (ctlfunc)
>                       (*ctlfunc)(PRC_MTUINC, sintosa(&sin),
>                           r->rtt_tableid, NULL);
> -             splx(s);
>       } else {
>               if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0)
>                       rt->rt_rmx.rmx_mtu = 0;
> @@ -1097,17 +1096,15 @@ void
>  icmp_redirect_timeout(struct rtentry *rt, struct rttimer *r)
>  {
>       struct ifnet *ifp;
> -     int s;
> +
> +     SOCKET_ASSERT_LOCKED();
>
>       ifp = if_get(rt->rt_ifidx);
>       if (ifp == NULL)
>               return;
>
> -     if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST)) {
> -             s = splsoftnet();
> +     if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST))
>               rtdeletemsg(rt, ifp, r->rtt_tableid);
> -             splx(s);
> -     }
>
>       if_put(ifp);
>  }
> diff --git sys/netinet/ip_input.c sys/netinet/ip_input.c
> index 7936492..dc10925 100644
> --- sys/netinet/ip_input.c
> +++ sys/netinet/ip_input.c
> @@ -1601,20 +1601,20 @@ ip_sysctl(int *name, u_int namelen, void *oldp,
> size_t *oldlenp, void *newp,
>                       ip_mtudisc_timeout_q =
>                           rt_timer_queue_create(ip_mtudisc_timeout);
>               } else if (ip_mtudisc == 0 && ip_mtudisc_timeout_q != NULL) {
> -                     s = splsoftnet();
> +                     SOCKET_LOCK(s);
>                       rt_timer_queue_destroy(ip_mtudisc_timeout_q);
>                       ip_mtudisc_timeout_q = NULL;
> -                     splx(s);
> +                     SOCKET_UNLOCK(s);
>               }
>               return error;
>       case IPCTL_MTUDISCTIMEOUT:
>               error = sysctl_int(oldp, oldlenp, newp, newlen,
>                  &ip_mtudisc_timeout);
>               if (ip_mtudisc_timeout_q != NULL) {
> -                     s = splsoftnet();
> +                     SOCKET_LOCK(s);
>                       rt_timer_queue_change(ip_mtudisc_timeout_q,
>                                             ip_mtudisc_timeout);
> -                     splx(s);
> +                     SOCKET_UNLOCK(s);
>               }
>               return (error);
>       case IPCTL_IPSEC_ENC_ALGORITHM:
> @@ -1755,12 +1755,15 @@ ip_send_dispatch(void *xmq)
>       int s;
>
>       mq_delist(mq, &ml);
> +     if (ml_empty(&ml))
> +             return;
> +
>       KERNEL_LOCK();
> -     s = splsoftnet();
> +     SOCKET_LOCK(s);
>       while ((m = ml_dequeue(&ml)) != NULL) {
>               ip_output(m, NULL, NULL, 0, NULL, NULL, 0);
>       }
> -     splx(s);
> +     SOCKET_UNLOCK(s);
>       KERNEL_UNLOCK();
>  }
>
> diff --git sys/netinet/ip_output.c sys/netinet/ip_output.c
> index 2c0f416..58a31cd 100644
> --- sys/netinet/ip_output.c
> +++ sys/netinet/ip_output.c
> @@ -109,6 +109,8 @@ ip_output(struct mbuf *m0, struct mbuf *opt, struct
> route *ro, int flags,
>       int rv;
>  #endif
>
> +     SOCKET_ASSERT_LOCKED();
> +
>  #ifdef IPSEC
>       if (inp && (inp->inp_flags & INP_IPV6) != 0)
>               panic("ip_output: IPv6 pcb is passed");
> diff --git sys/netinet/tcp_input.c sys/netinet/tcp_input.c
> index 2d06f54..8668f15 100644
> --- sys/netinet/tcp_input.c
> +++ sys/netinet/tcp_input.c
> @@ -3522,11 +3522,9 @@ syn_cache_timer(void *arg)
>       struct syn_cache *sc = arg;
>       int s;
>
> -     s = splsoftnet();
> -     if (sc->sc_flags & SCF_DEAD) {
> -             splx(s);
> -             return;
> -     }
> +     SOCKET_LOCK(s);
> +     if (sc->sc_flags & SCF_DEAD)
> +             goto out;
>
>       if (__predict_false(sc->sc_rxtshift == TCP_MAXRXTSHIFT)) {
>               /* Drop it -- too many retransmissions. */
> @@ -3549,14 +3547,15 @@ syn_cache_timer(void *arg)
>       sc->sc_rxtshift++;
>       SYN_CACHE_TIMER_ARM(sc);
>
> -     splx(s);
> + out:
> +     SOCKET_UNLOCK(s);
>       return;
>
>   dropit:
>       tcpstat.tcps_sc_timed_out++;
>       syn_cache_rm(sc);
>       syn_cache_put(sc);
> -     splx(s);
> +     SOCKET_UNLOCK(s);
>  }
>
>  void
> diff --git sys/netinet/tcp_timer.c sys/netinet/tcp_timer.c
> index 6f4f07e..6ef40fd 100644
> --- sys/netinet/tcp_timer.c
> +++ sys/netinet/tcp_timer.c
> @@ -112,15 +112,13 @@ tcp_delack(void *arg)
>        * for whatever reason, it will restart the delayed
>        * ACK callout.
>        */
> -
> -     s = splsoftnet();
> -     if (tp->t_flags & TF_DEAD) {
> -             splx(s);
> -             return;
> -     }
> +     SOCKET_LOCK(s);
> +     if (tp->t_flags & TF_DEAD)
> +             goto out;
>       tp->t_flags |= TF_ACKNOW;
>       (void) tcp_output(tp);
> -     splx(s);
> + out:
> +     SOCKET_UNLOCK(s);
>  }
>
>  /*
> @@ -193,11 +191,9 @@ tcp_timer_rexmt(void *arg)
>       uint32_t rto;
>       int s;
>
> -     s = splsoftnet();
> -     if (tp->t_flags & TF_DEAD) {
> -             splx(s);
> -             return;
> -     }
> +     SOCKET_LOCK(s);
> +     if (tp->t_flags & TF_DEAD)
> +             goto out;
>
>       if ((tp->t_flags & TF_PMTUD_PEND) && tp->t_inpcb &&
>           SEQ_GEQ(tp->t_pmtud_th_seq, tp->snd_una) &&
> @@ -224,8 +220,7 @@ tcp_timer_rexmt(void *arg)
>               sin.sin_addr = tp->t_inpcb->inp_faddr;
>               in_pcbnotifyall(&tcbtable, sintosa(&sin),
>                   tp->t_inpcb->inp_rtableid, EMSGSIZE, tcp_mtudisc);
> -             splx(s);
> -             return;
> +             goto out;
>       }
>
>  #ifdef TCP_SACK
> @@ -376,7 +371,7 @@ tcp_timer_rexmt(void *arg)
>       (void) tcp_output(tp);
>
>   out:
> -     splx(s);
> +     SOCKET_UNLOCK(s);
>  }
>
>  void
> @@ -386,11 +381,10 @@ tcp_timer_persist(void *arg)
>       uint32_t rto;
>       int s;
>
> -     s = splsoftnet();
> +     SOCKET_LOCK(s);
>       if ((tp->t_flags & TF_DEAD) ||
>              TCP_TIMER_ISARMED(tp, TCPT_REXMT)) {
> -             splx(s);
> -             return;
> +             goto out;
>       }
>       tcpstat.tcps_persisttimeo++;
>       /*
> @@ -415,7 +409,7 @@ tcp_timer_persist(void *arg)
>       (void) tcp_output(tp);
>       tp->t_force = 0;
>   out:
> -     splx(s);
> +     SOCKET_UNLOCK(s);
>  }
>
>  void
> @@ -424,11 +418,9 @@ tcp_timer_keep(void *arg)
>       struct tcpcb *tp = arg;
>       int s;
>
> -     s = splsoftnet();
> -     if (tp->t_flags & TF_DEAD) {
> -             splx(s);
> -             return;
> -     }
> +     SOCKET_LOCK(s);
> +     if (tp->t_flags & TF_DEAD)
> +             goto out;
>
>       tcpstat.tcps_keeptimeo++;
>       if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
> @@ -457,15 +449,14 @@ tcp_timer_keep(void *arg)
>               TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl);
>       } else
>               TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
> -
> -     splx(s);
> + out:
> +     SOCKET_UNLOCK(s);
>       return;
>
>   dropit:
>       tcpstat.tcps_keepdrops++;
>       tp = tcp_drop(tp, ETIMEDOUT);
> -
> -     splx(s);
> +     SOCKET_UNLOCK(s);
>  }
>
>  void
> @@ -474,11 +465,9 @@ tcp_timer_2msl(void *arg)
>       struct tcpcb *tp = arg;
>       int s;
>
> -     s = splsoftnet();
> -     if (tp->t_flags & TF_DEAD) {
> -             splx(s);
> -             return;
> -     }
> +     SOCKET_LOCK(s);
> +     if (tp->t_flags & TF_DEAD)
> +             goto out;
>
>  #ifdef TCP_SACK
>       tcp_timer_freesack(tp);
> @@ -490,5 +479,6 @@ tcp_timer_2msl(void *arg)
>       else
>               tp = tcp_close(tp);
>
> -     splx(s);
> + out:
> +     SOCKET_UNLOCK(s);
>  }
> diff --git sys/netinet6/icmp6.c sys/netinet6/icmp6.c
> index c918004..2abbc12 100644
> --- sys/netinet6/icmp6.c
> +++ sys/netinet6/icmp6.c
> @@ -1914,17 +1914,14 @@ icmp6_mtudisc_clone(struct sockaddr *dst, u_int
> rdomain)
>       if ((rt->rt_flags & RTF_HOST) == 0) {
>               struct rt_addrinfo info;
>               struct rtentry *nrt;
> -             int s;
>
>               bzero(&info, sizeof(info));
>               info.rti_flags = RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC;
>               info.rti_info[RTAX_DST] = dst;
>               info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
>
> -             s = splsoftnet();
>               error = rtrequest(RTM_ADD, &info, rt->rt_priority, &nrt,
>                   rdomain);
> -             splx(s);
>               if (error) {
>                       rtfree(rt);
>                       return NULL;
> @@ -1947,16 +1944,15 @@ void
>  icmp6_mtudisc_timeout(struct rtentry *rt, struct rttimer *r)
>  {
>       struct ifnet *ifp;
> -     int s;
> +
> +     SOCKET_ASSERT_LOCKED();
>
>       ifp = if_get(rt->rt_ifidx);
>       if (ifp == NULL)
>               return;
>
>       if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST)) {
> -             s = splsoftnet();
>               rtdeletemsg(rt, ifp, r->rtt_tableid);
> -             splx(s);
>       } else {
>               if (!(rt->rt_rmx.rmx_locks & RTV_MTU))
>                       rt->rt_rmx.rmx_mtu = 0;
> @@ -1969,17 +1965,15 @@ void
>  icmp6_redirect_timeout(struct rtentry *rt, struct rttimer *r)
>  {
>       struct ifnet *ifp;
> -     int s;
> +
> +     SOCKET_ASSERT_LOCKED();
>
>       ifp = if_get(rt->rt_ifidx);
>       if (ifp == NULL)
>               return;
>
> -     if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST)) {
> -             s = splsoftnet();
> +     if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST))
>               rtdeletemsg(rt, ifp, r->rtt_tableid);
> -             splx(s);
> -     }
>
>       if_put(ifp);
>  }
> diff --git sys/netinet6/ip6_input.c sys/netinet6/ip6_input.c
> index 9ac2555..aed3ebd 100644
> --- sys/netinet6/ip6_input.c
> +++ sys/netinet6/ip6_input.c
> @@ -1429,12 +1429,15 @@ ip6_send_dispatch(void *xmq)
>       int s;
>
>       mq_delist(mq, &ml);
> +     if (ml_empty(&ml))
> +             return;
> +
>       KERNEL_LOCK();
> -     s = splsoftnet();
> +     SOCKET_LOCK(s);
>       while ((m = ml_dequeue(&ml)) != NULL) {
>               ip6_output(m, NULL, NULL, IPV6_MINMTU, NULL, NULL);
>       }
> -     splx(s);
> +     SOCKET_UNLOCK(s);
>       KERNEL_UNLOCK();
>  }
>
> diff --git sys/netinet6/nd6.c sys/netinet6/nd6.c
> index 34c8d9c..66e6068 100644
> --- sys/netinet6/nd6.c
> +++ sys/netinet6/nd6.c
> @@ -308,10 +308,6 @@ skip1:
>  void
>  nd6_llinfo_settimer(struct llinfo_nd6 *ln, int secs)
>  {
> -     int s;
> -
> -     s = splsoftnet();
> -
>       if (secs < 0) {
>               ln->ln_rt->rt_expire = 0;
>               timeout_del(&ln->ln_timer_ch);
> @@ -319,8 +315,6 @@ nd6_llinfo_settimer(struct llinfo_nd6 *ln, int secs)
>               ln->ln_rt->rt_expire = time_uptime + secs;
>               timeout_add_sec(&ln->ln_timer_ch, secs);
>       }
> -
> -     splx(s);
>  }
>
>  void
> @@ -333,14 +327,14 @@ nd6_llinfo_timer(void *arg)
>       struct ifnet *ifp;
>       struct nd_ifinfo *ndi = NULL;
>
> -     s = splsoftnet();
> +     SOCKET_LOCK(s);
>
>       ln = (struct llinfo_nd6 *)arg;
>
>       if ((rt = ln->ln_rt) == NULL)
>               panic("ln->ln_rt == NULL");
>       if ((ifp = if_get(rt->rt_ifidx)) == NULL) {
> -             splx(s);
> +             SOCKET_UNLOCK(s);
>               return;
>       }
>       ndi = ND_IFINFO(ifp);
> @@ -427,7 +421,7 @@ nd6_llinfo_timer(void *arg)
>       }
>
>       if_put(ifp);
> -     splx(s);
> +     SOCKET_UNLOCK(s);
>  }
>
>  /*
> @@ -989,7 +983,7 @@ nd6_rtrequest(struct ifnet *ifp, int req, struct rtentry
> *rt)
>               nd6_inuse++;
>               nd6_allocated++;
>               ln->ln_rt = rt;
> -             timeout_set(&ln->ln_timer_ch, nd6_llinfo_timer, ln);
> +             timeout_set_proc(&ln->ln_timer_ch, nd6_llinfo_timer, ln);
>               /* this is required for "ndp" command. - shin */
>               if (req == RTM_ADD) {
>                       /*
> diff --git sys/sys/systm.h sys/sys/systm.h
> index 5ef388b..56d57d3 100644
> --- sys/sys/systm.h
> +++ sys/sys/systm.h
> @@ -290,6 +290,31 @@ struct uio;
>  int  uiomove(void *, size_t, struct uio *);
>
>  #if defined(_KERNEL)
> +/*
> + * Serialize socket operations to ensure that code paths that were
> + * atomically executed stay atomic until we turn then mpsafe.
> + */
> +extern struct rwlock socketlock;
> +
> +#define      SOCKET_LOCK(s)                                                  
> \
> +do {                                                                 \
> +     rw_enter_write(&socketlock);                                    \
> +     s = splsoftnet();                                               \
> +} while (/* CONSTCOND */ 0)
> +
> +#define      SOCKET_UNLOCK(s)                                                
> \
> +do {                                                                 \
> +     splx(s);                                                        \
> +     rw_exit_write(&socketlock);                                     \
> +} while (/* CONSTCOND */ 0)
> +
> +#define      SOCKET_ASSERT_LOCKED()                                          
> \
> +do {                                                                 \
> +     if (rw_status(&socketlock) != RW_WRITE)                         \
> +             splassert_fail(RW_WRITE, rw_status(&socketlock), __func__);\
> +     splsoftassert(IPL_SOFTNET);                                     \
> +} while (0)
> +
>  __returns_twice int  setjmp(label_t *);
>  __dead void  longjmp(label_t *);
>  #endif
>
>

Reply via email to