Hi, i did some performance measurements with and without your diff on OpenBSD-current.
There is no performance difference. I think this is the expected outcome. BR Simon 2016-11-04 10:12 GMT+01:00, Martin Pieuchot <[email protected]>: > On 03/11/16(Thu) 11:21, Martin Pieuchot wrote: >> Here's the next iteration of my diff introducing a rwlock to serialize >> the network input path with socket paths. Changes are: >> >> - more timeout_set_proc() that should fix problems reported by >> Chris Jackman. >> >> - I introduced a set of macro to make it easier to audit existing >> splsoftnet(). >> >> - It makes use of splassert_fail() if the lock is not held. >> >> >> My plan is to commit it, assuming it is stable enough, then fix the >> remaining issues in tree. This includes: >> >> - Analyze and if needed fix the two code paths were we do an >> unlock/lock >> dance >> >> - Remove unneeded/recursive splsoftnet() dances. >> >> Once that's done we should be able to remove the KERNEL_LOCK() from the >> input path. >> >> So please test and report back. > > Updated version that prevents a recursion in doaccept(), reported by Nils > Frohberg. > > diff --git sys/kern/sys_socket.c sys/kern/sys_socket.c > index 7a90f78..a7be8a1 100644 > --- sys/kern/sys_socket.c > +++ sys/kern/sys_socket.c > @@ -133,7 +133,7 @@ soo_poll(struct file *fp, int events, struct proc *p) > int revents = 0; > int s; > > - s = splsoftnet(); > + SOCKET_LOCK(s); > if (events & (POLLIN | POLLRDNORM)) { > if (soreadable(so)) > revents |= events & (POLLIN | POLLRDNORM); > @@ -159,7 +159,7 @@ soo_poll(struct file *fp, int events, struct proc *p) > so->so_snd.sb_flagsintr |= SB_SEL; > } > } > - splx(s); > + SOCKET_UNLOCK(s); > return (revents); > } > > diff --git sys/kern/uipc_socket.c sys/kern/uipc_socket.c > index 9e8d05f..dd067b3 100644 > --- sys/kern/uipc_socket.c > +++ sys/kern/uipc_socket.c > @@ -89,6 +89,11 @@ struct pool sosplice_pool; > struct taskq *sosplice_taskq; > #endif > > +/* > + * Serialize socket operations. > + */ > +struct rwlock socketlock = RWLOCK_INITIALIZER("socketlock"); > + > void > soinit(void) > { > @@ -123,7 +128,7 @@ socreate(int dom, struct socket **aso, int type, int > proto) > return (EPROTONOSUPPORT); > if (prp->pr_type != type) > return (EPROTOTYPE); > - s = splsoftnet(); > + SOCKET_LOCK(s); > so = pool_get(&socket_pool, PR_WAITOK | PR_ZERO); > TAILQ_INIT(&so->so_q0); > TAILQ_INIT(&so->so_q); > @@ -141,10 +146,10 @@ socreate(int dom, struct socket **aso, int type, int > proto) > if (error) { > so->so_state |= SS_NOFDREF; > sofree(so); > - splx(s); > + SOCKET_UNLOCK(s); > return (error); > } > - splx(s); > + SOCKET_UNLOCK(s); > *aso = so; > return (0); > } > @@ -154,9 +159,9 @@ sobind(struct socket *so, struct mbuf *nam, struct proc > *p) > { > int s, error; > > - s = splsoftnet(); > + SOCKET_LOCK(s); > error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, p); > - splx(s); > + SOCKET_UNLOCK(s); > return (error); > } > > @@ -171,11 +176,11 @@ solisten(struct socket *so, int backlog) > if (isspliced(so) || issplicedback(so)) > return (EOPNOTSUPP); > #endif /* SOCKET_SPLICE */ > - s = splsoftnet(); > + SOCKET_LOCK(s); > error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL, > curproc); > if (error) { > - splx(s); > + SOCKET_UNLOCK(s); > return (error); > } > if (TAILQ_FIRST(&so->so_q) == NULL) > @@ -185,14 +190,14 @@ solisten(struct socket *so, int backlog) > if (backlog < sominconn) > backlog = sominconn; > so->so_qlimit = backlog; > - splx(s); > + SOCKET_UNLOCK(s); > return (0); > } > > void > sofree(struct socket *so) > { > - splsoftassert(IPL_SOFTNET); > + SOCKET_ASSERT_LOCKED(); > > if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) > return; > @@ -232,7 +237,7 @@ soclose(struct socket *so) > struct socket *so2; > int s, error = 0; > > - s = splsoftnet(); > + SOCKET_LOCK(s); > if (so->so_options & SO_ACCEPTCONN) { > while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) { > (void) soqremque(so2, 0); > @@ -256,7 +261,7 @@ soclose(struct socket *so) > (so->so_state & SS_NBIO)) > goto drop; > while (so->so_state & SS_ISCONNECTED) { > - error = tsleep(&so->so_timeo, > + error = rwsleep(&so->so_timeo, &socketlock, > PSOCK | PCATCH, "netcls", > so->so_linger * hz); > if (error) > @@ -276,14 +281,14 @@ discard: > panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type); > so->so_state |= SS_NOFDREF; > sofree(so); > - splx(s); > + SOCKET_UNLOCK(s); > return (error); > } > > int > soabort(struct socket *so) > { > - splsoftassert(IPL_SOFTNET); > + SOCKET_ASSERT_LOCKED(); > > return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, NULL, NULL, NULL, > curproc); > @@ -294,7 +299,7 @@ soaccept(struct socket *so, struct mbuf *nam) > { > int error = 0; > > - splsoftassert(IPL_SOFTNET); > + SOCKET_ASSERT_LOCKED(); > > if ((so->so_state & SS_NOFDREF) == 0) > panic("soaccept !NOFDREF: so %p, so_type %d", so, so->so_type); > @@ -315,7 +320,7 @@ soconnect(struct socket *so, struct mbuf *nam) > > if (so->so_options & SO_ACCEPTCONN) > return (EOPNOTSUPP); > - s = splsoftnet(); > + SOCKET_LOCK(s); > /* > * If protocol is connection-based, can only connect once. > * Otherwise, if connected, try to disconnect first. > @@ -329,7 +334,7 @@ soconnect(struct socket *so, struct mbuf *nam) > else > error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, > NULL, nam, NULL, curproc); > - splx(s); > + SOCKET_UNLOCK(s); > return (error); > } > > @@ -338,10 +343,10 @@ soconnect2(struct socket *so1, struct socket *so2) > { > int s, error; > > - s = splsoftnet(); > + SOCKET_LOCK(s); > error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL, > (struct mbuf *)so2, NULL, curproc); > - splx(s); > + SOCKET_UNLOCK(s); > return (error); > } > > @@ -350,7 +355,7 @@ sodisconnect(struct socket *so) > { > int error; > > - splsoftassert(IPL_SOFTNET); > + SOCKET_ASSERT_LOCKED(); > > if ((so->so_state & SS_ISCONNECTED) == 0) > return (ENOTCONN); > @@ -418,21 +423,20 @@ sosend(struct socket *so, struct mbuf *addr, struct > uio *uio, struct mbuf *top, > (sizeof(struct file *) / sizeof(int))); > } > > -#define snderr(errno) { error = errno; splx(s); goto release; } > +#define snderr(errno) { error = errno; SOCKET_UNLOCK(s); goto > release; } > > restart: > if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0) > goto out; > so->so_state |= SS_ISSENDING; > do { > - s = splsoftnet(); > + SOCKET_LOCK(s); > if (so->so_state & SS_CANTSENDMORE) > snderr(EPIPE); > if (so->so_error) { > error = so->so_error; > so->so_error = 0; > - splx(s); > - goto release; > + snderr(error); > } > if ((so->so_state & SS_ISCONNECTED) == 0) { > if (so->so_proto->pr_flags & PR_CONNREQUIRED) { > @@ -456,12 +460,12 @@ restart: > sbunlock(&so->so_snd); > error = sbwait(&so->so_snd); > so->so_state &= ~SS_ISSENDING; > - splx(s); > + SOCKET_UNLOCK(s); > if (error) > goto out; > goto restart; > } > - splx(s); > + SOCKET_UNLOCK(s); > space -= clen; > do { > if (uio == NULL) { > @@ -481,13 +485,13 @@ restart: > if (flags & MSG_EOR) > top->m_flags |= M_EOR; > } > - s = splsoftnet(); /* XXX */ > + SOCKET_LOCK(s); > if (resid == 0) > so->so_state &= ~SS_ISSENDING; > error = (*so->so_proto->pr_usrreq)(so, > (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, > top, addr, control, curproc); > - splx(s); > + SOCKET_UNLOCK(s); > clen = 0; > control = NULL; > top = NULL; > @@ -617,8 +621,8 @@ sbsync(struct sockbuf *sb, struct mbuf *nextrecord) > * must begin with an address if the protocol so specifies, > * followed by an optional mbuf or mbufs containing ancillary data, > * and then zero or more mbufs of data. > - * In order to avoid blocking network interrupts for the entire time here, > - * we splx() while doing the actual copy to user space. > + * In order to avoid blocking network for the entire time here, we splx() > + * and release ``socketlock'' while doing the actual copy to user space. > * Although the sockbuf is locked, new data may still be appended, > * and thus we must maintain consistency of the sockbuf during that time. > * > @@ -672,7 +676,7 @@ bad: > restart: > if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) > return (error); > - s = splsoftnet(); > + SOCKET_LOCK(s); > > m = so->so_rcv.sb_mb; > #ifdef SOCKET_SPLICE > @@ -737,7 +741,7 @@ restart: > SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); > sbunlock(&so->so_rcv); > error = sbwait(&so->so_rcv); > - splx(s); > + SOCKET_UNLOCK(s); > if (error) > return (error); > goto restart; > @@ -871,9 +875,9 @@ dontblock: > SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); > SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); > resid = uio->uio_resid; > - splx(s); > + SOCKET_UNLOCK(s); > uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio); > - s = splsoftnet(); > + SOCKET_LOCK(s); > if (uio_error) > uio->uio_resid = resid - len; > } else > @@ -955,7 +959,7 @@ dontblock: > error = sbwait(&so->so_rcv); > if (error) { > sbunlock(&so->so_rcv); > - splx(s); > + SOCKET_UNLOCK(s); > return (0); > } > if ((m = so->so_rcv.sb_mb) != NULL) > @@ -991,7 +995,7 @@ dontblock: > if (orig_resid == uio->uio_resid && orig_resid && > (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { > sbunlock(&so->so_rcv); > - splx(s); > + SOCKET_UNLOCK(s); > goto restart; > } > > @@ -1002,7 +1006,7 @@ dontblock: > *flagsp |= flags; > release: > sbunlock(&so->so_rcv); > - splx(s); > + SOCKET_UNLOCK(s); > return (error); > } > > @@ -1012,7 +1016,7 @@ soshutdown(struct socket *so, int how) > struct protosw *pr = so->so_proto; > int s, error = 0; > > - s = splsoftnet(); > + SOCKET_LOCK(s); > switch (how) { > case SHUT_RD: > case SHUT_RDWR: > @@ -1028,7 +1032,8 @@ soshutdown(struct socket *so, int how) > error = EINVAL; > break; > } > - splx(s); > + SOCKET_UNLOCK(s); > + > return (error); > } > > @@ -1042,6 +1047,7 @@ sorflush(struct socket *so) > > sb->sb_flags |= SB_NOINTR; > (void) sblock(sb, M_WAITOK); > + /* XXXSMP */ > s = splnet(); > socantrcvmore(so); > sbunlock(sb); > @@ -1095,10 +1101,10 @@ sosplice(struct socket *so, int fd, off_t max, > struct timeval *tv) > if ((error = sblock(&so->so_rcv, > (so->so_state & SS_NBIO) ? M_NOWAIT : M_WAITOK)) != 0) > return (error); > - s = splsoftnet(); > + SOCKET_LOCK(s); > if (so->so_sp->ssp_socket) > sounsplice(so, so->so_sp->ssp_socket, 1); > - splx(s); > + SOCKET_UNLOCK(s); > sbunlock(&so->so_rcv); > return (0); > } > @@ -1127,7 +1133,7 @@ sosplice(struct socket *so, int fd, off_t max, struct > timeval *tv) > FRELE(fp, curproc); > return (error); > } > - s = splsoftnet(); > + SOCKET_LOCK(s); > > if (so->so_sp->ssp_socket || sosp->so_sp->ssp_soback) { > error = EBUSY; > @@ -1168,7 +1174,7 @@ sosplice(struct socket *so, int fd, off_t max, struct > timeval *tv) > } > > release: > - splx(s); > + SOCKET_UNLOCK(s); > sbunlock(&sosp->so_snd); > sbunlock(&so->so_rcv); > FRELE(fp, curproc); > @@ -1178,7 +1184,7 @@ sosplice(struct socket *so, int fd, off_t max, struct > timeval *tv) > void > sounsplice(struct socket *so, struct socket *sosp, int wakeup) > { > - splsoftassert(IPL_SOFTNET); > + SOCKET_ASSERT_LOCKED(); > > task_del(sosplice_taskq, &so->so_splicetask); > timeout_del(&so->so_idleto); > @@ -1195,12 +1201,12 @@ soidle(void *arg) > struct socket *so = arg; > int s; > > - s = splsoftnet(); > + SOCKET_LOCK(s); > if (so->so_rcv.sb_flagsintr & SB_SPLICE) { > so->so_error = ETIMEDOUT; > sounsplice(so, so->so_sp->ssp_socket, 1); > } > - splx(s); > + SOCKET_UNLOCK(s); > } > > void > @@ -1209,7 +1215,7 @@ sotask(void *arg) > struct socket *so = arg; > int s; > > - s = splsoftnet(); > + SOCKET_LOCK(s); > if (so->so_rcv.sb_flagsintr & SB_SPLICE) { > /* > * We may not sleep here as sofree() and unsplice() may be > @@ -1218,7 +1224,7 @@ sotask(void *arg) > */ > somove(so, M_DONTWAIT); > } > - splx(s); > + SOCKET_UNLOCK(s); > > /* Avoid user land starvation. */ > yield(); > @@ -1240,7 +1246,7 @@ somove(struct socket *so, int wait) > int error = 0, maxreached = 0; > short state; > > - splsoftassert(IPL_SOFTNET); > + SOCKET_ASSERT_LOCKED(); > > nextpkt: > if (so->so_error) { > @@ -1502,7 +1508,7 @@ somove(struct socket *so, int wait) > void > sorwakeup(struct socket *so) > { > - splsoftassert(IPL_SOFTNET); > + SOCKET_ASSERT_LOCKED(); > > #ifdef SOCKET_SPLICE > if (so->so_rcv.sb_flagsintr & SB_SPLICE) { > @@ -1523,14 +1529,18 @@ sorwakeup(struct socket *so) > return; > #endif > sowakeup(so, &so->so_rcv); > - if (so->so_upcall) > + if (so->so_upcall) { > + /* XXXSMP breaks atomicity */ > + rw_exit_write(&socketlock); > (*(so->so_upcall))(so, so->so_upcallarg, M_DONTWAIT); > + rw_enter_write(&socketlock); > + } > } > > void > sowwakeup(struct socket *so) > { > - splsoftassert(IPL_SOFTNET); > + SOCKET_ASSERT_LOCKED(); > > #ifdef SOCKET_SPLICE > if (so->so_snd.sb_flagsintr & SB_SPLICE) > @@ -1876,7 +1886,8 @@ soo_kqfilter(struct file *fp, struct knote *kn) > { > struct socket *so = kn->kn_fp->f_data; > struct sockbuf *sb; > - int s; > + > + KERNEL_ASSERT_LOCKED(); > > switch (kn->kn_filter) { > case EVFILT_READ: > @@ -1894,10 +1905,9 @@ soo_kqfilter(struct file *fp, struct knote *kn) > return (EINVAL); > } > > - s = splnet(); > SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext); > sb->sb_flags |= SB_KNOTE; > - splx(s); > + > return (0); > } > > @@ -1905,12 +1915,12 @@ void > filt_sordetach(struct knote *kn) > { > struct socket *so = kn->kn_fp->f_data; > - int s = splnet(); > + > + KERNEL_ASSERT_LOCKED(); > > SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext); > if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note)) > so->so_rcv.sb_flags &= ~SB_KNOTE; > - splx(s); > } > > int > @@ -1939,12 +1949,12 @@ void > filt_sowdetach(struct knote *kn) > { > struct socket *so = kn->kn_fp->f_data; > - int s = splnet(); > + > + KERNEL_ASSERT_LOCKED(); > > SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext); > if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note)) > so->so_snd.sb_flags &= ~SB_KNOTE; > - splx(s); > } > > int > diff --git sys/kern/uipc_socket2.c sys/kern/uipc_socket2.c > index c3b7c3a..ed9fa6f 100644 > --- sys/kern/uipc_socket2.c > +++ sys/kern/uipc_socket2.c > @@ -145,7 +145,7 @@ sonewconn(struct socket *head, int connstatus) > struct socket *so; > int soqueue = connstatus ? 1 : 0; > > - splsoftassert(IPL_SOFTNET); > + SOCKET_ASSERT_LOCKED(); > > if (mclpools[0].pr_nout > mclpools[0].pr_hardlimit * 95 / 100) > return (NULL); > @@ -274,10 +274,10 @@ socantrcvmore(struct socket *so) > int > sbwait(struct sockbuf *sb) > { > - splsoftassert(IPL_SOFTNET); > + SOCKET_ASSERT_LOCKED(); > > sb->sb_flagsintr |= SB_WAIT; > - return (tsleep(&sb->sb_cc, > + return (rwsleep(&sb->sb_cc, &socketlock, > (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "netio", > sb->sb_timeo)); > } > @@ -315,7 +315,7 @@ sbunlock(struct sockbuf *sb) > void > sowakeup(struct socket *so, struct sockbuf *sb) > { > - splsoftassert(IPL_SOFTNET); > + SOCKET_ASSERT_LOCKED(); > > selwakeup(&sb->sb_sel); > sb->sb_flagsintr &= ~SB_SEL; > diff --git sys/kern/uipc_syscalls.c sys/kern/uipc_syscalls.c > index e064bc9..fd54e70 100644 > --- sys/kern/uipc_syscalls.c > +++ sys/kern/uipc_syscalls.c > @@ -276,16 +276,11 @@ doaccept(struct proc *p, int sock, struct sockaddr > *name, socklen_t *anamelen, > if ((error = getsock(p, sock, &fp)) != 0) > return (error); > > - s = splsoftnet(); > headfp = fp; > - head = fp->f_data; > - > - if (isdnssocket((struct socket *)fp->f_data)) { > - error = EINVAL; > - goto bad; > - } > redo: > - if ((head->so_options & SO_ACCEPTCONN) == 0) { > + SOCKET_LOCK(s); > + head = headfp->f_data; > + if (isdnssocket(head) || (head->so_options & SO_ACCEPTCONN) == 0) { > error = EINVAL; > goto bad; > } > @@ -301,7 +296,8 @@ redo: > head->so_error = ECONNABORTED; > break; > } > - error = tsleep(&head->so_timeo, PSOCK | PCATCH, "netcon", 0); > + error = rwsleep(&head->so_timeo, &socketlock, PSOCK | PCATCH, > + "netcon", 0); > if (error) { > goto bad; > } > @@ -311,7 +307,7 @@ redo: > head->so_error = 0; > goto bad; > } > - > + > /* Figure out whether the new socket should be non-blocking. */ > nflag = flags & SOCK_NONBLOCK_INHERIT ? (headfp->f_flag & FNONBLOCK) > : (flags & SOCK_NONBLOCK ? FNONBLOCK : 0); > @@ -338,6 +334,7 @@ redo: > * or another thread or process to accept it. If so, start over. > */ > if (head->so_qlen == 0) { > + SOCKET_UNLOCK(s); > m_freem(nam); > fdplock(fdp); > fdremove(fdp, tmpfd); > @@ -366,18 +363,23 @@ redo: > > if (error) { > /* if an error occurred, free the file descriptor */ > + SOCKET_UNLOCK(s); > + m_freem(nam); > fdplock(fdp); > fdremove(fdp, tmpfd); > closef(fp, p); > fdpunlock(fdp); > + goto out; > } else { > (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&nflag, p); > FILE_SET_MATURE(fp, p); > *retval = tmpfd; > + m_freem(nam); > } > - m_freem(nam); > + > bad: > - splx(s); > + SOCKET_UNLOCK(s); > +out: > FRELE(headfp, p); > return (error); > } > @@ -434,9 +436,10 @@ sys_connect(struct proc *p, void *v, register_t > *retval) > m_freem(nam); > return (EINPROGRESS); > } > - s = splsoftnet(); > + SOCKET_LOCK(s); > while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { > - error = tsleep(&so->so_timeo, PSOCK | PCATCH, "netcon2", 0); > + error = rwsleep(&so->so_timeo, &socketlock, PSOCK | PCATCH, > + "netcon2", 0); > if (error) { > if (error == EINTR || error == ERESTART) > interrupted = 1; > @@ -447,7 +450,7 @@ sys_connect(struct proc *p, void *v, register_t > *retval) > error = so->so_error; > so->so_error = 0; > } > - splx(s); > + SOCKET_UNLOCK(s); > bad: > if (!interrupted) > so->so_state &= ~SS_ISCONNECTING; > diff --git sys/kern/uipc_usrreq.c sys/kern/uipc_usrreq.c > index e0f7f27..84bab2f 100644 > --- sys/kern/uipc_usrreq.c > +++ sys/kern/uipc_usrreq.c > @@ -131,7 +131,11 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, > struct mbuf *nam, > break; > > case PRU_BIND: > + /* XXXSMP breaks atomicity */ > + rw_assert_wrlock(&socketlock); > + rw_exit_write(&socketlock); > error = unp_bind(unp, nam, p); > + rw_enter_write(&socketlock); > break; > > case PRU_LISTEN: > diff --git sys/net/if.c sys/net/if.c > index b7c9e11..797344f 100644 > --- sys/net/if.c > +++ sys/net/if.c > @@ -160,7 +160,8 @@ void if_netisr(void *); > void ifa_print_all(void); > #endif > > -void if_start_locked(struct ifnet *ifp); > +void if_start_locked(struct ifnet *); > +int if_ioctl_locked(struct socket *, u_long, caddr_t, struct proc *); > > /* > * interface index map > @@ -835,10 +836,15 @@ if_netisr(void *unused) > int s; > > KERNEL_LOCK(); > - s = splsoftnet(); > + SOCKET_LOCK(s); > > while ((n = netisr) != 0) { > - sched_pause(); > + /* Like sched_pause() but with a rwlock dance. */ > + if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) { > + SOCKET_UNLOCK(s); > + yield(); > + SOCKET_LOCK(s); > + } > > atomic_clearbits_int(&netisr, n); > > @@ -876,7 +882,7 @@ if_netisr(void *unused) > pfsyncintr(); > #endif > > - splx(s); > + SOCKET_UNLOCK(s); > KERNEL_UNLOCK(); > } > > @@ -1429,7 +1435,7 @@ if_downall(void) > struct ifnet *ifp; > int s; > > - s = splnet(); > + SOCKET_LOCK(s); > TAILQ_FOREACH(ifp, &ifnet, if_list) { > if ((ifp->if_flags & IFF_UP) == 0) > continue; > @@ -1442,7 +1448,7 @@ if_downall(void) > (caddr_t)&ifrq); > } > } > - splx(s); > + SOCKET_UNLOCK(s); > } > > /* > @@ -1502,9 +1508,9 @@ if_linkstate_task(void *xifidx) > if (ifp == NULL) > return; > > - s = splsoftnet(); > + SOCKET_LOCK(s); > if_linkstate(ifp); > - splx(s); > + SOCKET_UNLOCK(s); > > if_put(ifp); > } > @@ -1512,7 +1518,7 @@ if_linkstate_task(void *xifidx) > void > if_linkstate(struct ifnet *ifp) > { > - splsoftassert(IPL_SOFTNET); > + SOCKET_ASSERT_LOCKED(); > > rt_ifmsg(ifp); > #ifndef SMALL_KERNEL > @@ -1703,6 +1709,18 @@ if_setrdomain(struct ifnet *ifp, int rdomain) > int > ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) > { > + int s, error; > + > + SOCKET_LOCK(s); > + error = if_ioctl_locked(so, cmd, data, p); > + SOCKET_UNLOCK(s); > + > + return (error); > +} > + > +int > +if_ioctl_locked(struct socket *so, u_long cmd, caddr_t data, struct proc > *p) > +{ > struct ifnet *ifp; > struct ifreq *ifr; > struct sockaddr_dl *sdl; > @@ -1751,20 +1769,15 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, > struct proc *p) > switch (ifar->ifar_af) { > case AF_INET: > /* attach is a noop for AF_INET */ > - if (cmd == SIOCIFAFDETACH) { > - s = splsoftnet(); > + if (cmd == SIOCIFAFDETACH) > in_ifdetach(ifp); > - splx(s); > - } > return (0); > #ifdef INET6 > case AF_INET6: > - s = splsoftnet(); > if (cmd == SIOCIFAFATTACH) > error = in6_ifattach(ifp); > else > in6_ifdetach(ifp); > - splx(s); > return (error); > #endif /* INET6 */ > default: > diff --git sys/net/route.c sys/net/route.c > index a04b095..ab2b924 100644 > --- sys/net/route.c > +++ sys/net/route.c > @@ -547,7 +547,7 @@ rtredirect(struct sockaddr *dst, struct sockaddr > *gateway, > int flags = RTF_GATEWAY|RTF_HOST; > uint8_t prio = RTP_NONE; > > - splsoftassert(IPL_SOFTNET); > + SOCKET_ASSERT_LOCKED(); > > /* verify the gateway is directly reachable */ > if ((ifa = ifa_ifwithnet(gateway, rdomain)) == NULL) { > @@ -1498,6 +1498,8 @@ rt_timer_queue_destroy(struct rttimer_queue *rtq) > { > struct rttimer *r; > > + SOCKET_ASSERT_LOCKED(); > + > while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) { > LIST_REMOVE(r, rtt_link); > TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next); > @@ -1590,7 +1592,7 @@ rt_timer_timer(void *arg) > > current_time = time_uptime; > > - s = splsoftnet(); > + SOCKET_LOCK(s); > for (rtq = LIST_FIRST(&rttimer_queue_head); rtq != NULL; > rtq = LIST_NEXT(rtq, rtq_link)) { > while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL && > @@ -1605,7 +1607,7 @@ rt_timer_timer(void *arg) > printf("rt_timer_timer: rtq_count reached 0\n"); > } > } > - splx(s); > + SOCKET_UNLOCK(s); > > timeout_add_sec(to, 1); > } > diff --git sys/net/rtsock.c sys/net/rtsock.c > index 46150c6..a40c6c8 100644 > --- sys/net/rtsock.c > +++ sys/net/rtsock.c > @@ -296,6 +296,7 @@ route_ctloutput(int op, struct socket *so, int level, > int optname, > return (error); > } > > +/* XXXSMP */ > void > rt_senddesync(void *data) > { > diff --git sys/netinet/if_ether.c sys/netinet/if_ether.c > index da076c2..9f43add 100644 > --- sys/netinet/if_ether.c > +++ sys/netinet/if_ether.c > @@ -110,10 +110,10 @@ void > arptimer(void *arg) > { > struct timeout *to = (struct timeout *)arg; > - int s; > struct llinfo_arp *la, *nla; > + int s; > > - s = splsoftnet(); > + SOCKET_LOCK(s); > timeout_add_sec(to, arpt_prune); > LIST_FOREACH_SAFE(la, &arp_list, la_list, nla) { > struct rtentry *rt = la->la_rt; > @@ -121,7 +121,7 @@ arptimer(void *arg) > if (rt->rt_expire && rt->rt_expire <= time_uptime) > arptfree(rt); /* timer has expired; clear */ > } > - splx(s); > + SOCKET_UNLOCK(s); > } > > void > @@ -138,7 +138,7 @@ arp_rtrequest(struct ifnet *ifp, int req, struct rtentry > *rt) > pool_init(&arp_pool, sizeof(struct llinfo_arp), 0, > IPL_SOFTNET, 0, "arp", NULL); > > - timeout_set(&arptimer_to, arptimer, &arptimer_to); > + timeout_set_proc(&arptimer_to, arptimer, &arptimer_to); > timeout_add_sec(&arptimer_to, 1); > } > > diff --git sys/netinet/ip_carp.c sys/netinet/ip_carp.c > index ff3ae78..1c4aa86 100644 > --- sys/netinet/ip_carp.c > +++ sys/netinet/ip_carp.c > @@ -1045,7 +1045,7 @@ carp_send_ad(void *v) > return; > } > > - s = splsoftnet(); > + SOCKET_LOCK(s); > > /* bow out if we've gone to backup (the carp interface is going down) */ > if (sc->sc_bow_out) { > @@ -1246,7 +1246,7 @@ carp_send_ad(void *v) > > retry_later: > sc->cur_vhe = NULL; > - splx(s); > + SOCKET_UNLOCK(s); > if (advbase != 255 || advskew != 255) > timeout_add(&vhe->ad_tmo, tvtohz(&tv)); > } > diff --git sys/netinet/ip_icmp.c sys/netinet/ip_icmp.c > index cdd60aa..57d4553 100644 > --- sys/netinet/ip_icmp.c > +++ sys/netinet/ip_icmp.c > @@ -884,7 +884,7 @@ icmp_sysctl(int *name, u_int namelen, void *oldp, size_t > *oldlenp, void *newp, > if (namelen != 1) > return (ENOTDIR); > > - s = splsoftnet(); > + SOCKET_LOCK(s); > switch (name[0]) { > case ICMPCTL_REDIRTIMEOUT: > > @@ -921,7 +921,7 @@ icmp_sysctl(int *name, u_int namelen, void *oldp, size_t > *oldlenp, void *newp, > error = ENOPROTOOPT; > break; > } > - splx(s); > + SOCKET_UNLOCK(s); > > return (error); > } > @@ -1046,7 +1046,8 @@ void > icmp_mtudisc_timeout(struct rtentry *rt, struct rttimer *r) > { > struct ifnet *ifp; > - int s; > + > + SOCKET_ASSERT_LOCKED(); > > ifp = if_get(rt->rt_ifidx); > if (ifp == NULL) > @@ -1058,7 +1059,6 @@ icmp_mtudisc_timeout(struct rtentry *rt, struct > rttimer *r) > > sin = *satosin(rt_key(rt)); > > - s = splsoftnet(); > rtdeletemsg(rt, ifp, r->rtt_tableid); > > /* Notify TCP layer of increased Path MTU estimate */ > @@ -1066,7 +1066,6 @@ icmp_mtudisc_timeout(struct rtentry *rt, struct > rttimer *r) > if (ctlfunc) > (*ctlfunc)(PRC_MTUINC, sintosa(&sin), > r->rtt_tableid, NULL); > - splx(s); > } else { > if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) > rt->rt_rmx.rmx_mtu = 0; > @@ -1097,17 +1096,15 @@ void > icmp_redirect_timeout(struct rtentry *rt, struct rttimer *r) > { > struct ifnet *ifp; > - int s; > + > + SOCKET_ASSERT_LOCKED(); > > ifp = if_get(rt->rt_ifidx); > if (ifp == NULL) > return; > > - if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST)) { > - s = splsoftnet(); > + if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST)) > rtdeletemsg(rt, ifp, r->rtt_tableid); > - splx(s); > - } > > if_put(ifp); > } > diff --git sys/netinet/ip_input.c sys/netinet/ip_input.c > index 7936492..dc10925 100644 > --- sys/netinet/ip_input.c > +++ sys/netinet/ip_input.c > @@ -1601,20 +1601,20 @@ ip_sysctl(int *name, u_int namelen, void *oldp, > size_t *oldlenp, void *newp, > ip_mtudisc_timeout_q = > rt_timer_queue_create(ip_mtudisc_timeout); > } else if (ip_mtudisc == 0 && ip_mtudisc_timeout_q != NULL) { > - s = splsoftnet(); > + SOCKET_LOCK(s); > rt_timer_queue_destroy(ip_mtudisc_timeout_q); > ip_mtudisc_timeout_q = NULL; > - splx(s); > + SOCKET_UNLOCK(s); > } > return error; > case IPCTL_MTUDISCTIMEOUT: > error = sysctl_int(oldp, oldlenp, newp, newlen, > &ip_mtudisc_timeout); > if (ip_mtudisc_timeout_q != NULL) { > - s = splsoftnet(); > + SOCKET_LOCK(s); > rt_timer_queue_change(ip_mtudisc_timeout_q, > ip_mtudisc_timeout); > - splx(s); > + SOCKET_UNLOCK(s); > } > return (error); > case IPCTL_IPSEC_ENC_ALGORITHM: > @@ -1755,12 +1755,15 @@ ip_send_dispatch(void *xmq) > int s; > > mq_delist(mq, &ml); > + if (ml_empty(&ml)) > + return; > + > KERNEL_LOCK(); > - s = splsoftnet(); > + SOCKET_LOCK(s); > while ((m = ml_dequeue(&ml)) != NULL) { > ip_output(m, NULL, NULL, 0, NULL, NULL, 0); > } > - splx(s); > + SOCKET_UNLOCK(s); > KERNEL_UNLOCK(); > } > > diff --git sys/netinet/ip_output.c sys/netinet/ip_output.c > index 2c0f416..58a31cd 100644 > --- sys/netinet/ip_output.c > +++ sys/netinet/ip_output.c > @@ -109,6 +109,8 @@ ip_output(struct mbuf *m0, struct mbuf *opt, struct > route *ro, int flags, > int rv; > #endif > > + SOCKET_ASSERT_LOCKED(); > + > #ifdef IPSEC > if (inp && (inp->inp_flags & INP_IPV6) != 0) > panic("ip_output: IPv6 pcb is passed"); > diff --git sys/netinet/tcp_input.c sys/netinet/tcp_input.c > index 2d06f54..8668f15 100644 > --- sys/netinet/tcp_input.c > +++ sys/netinet/tcp_input.c > @@ -3522,11 +3522,9 @@ syn_cache_timer(void *arg) > struct syn_cache *sc = arg; > int s; > > - s = splsoftnet(); > - if (sc->sc_flags & SCF_DEAD) { > - splx(s); > - return; > - } > + SOCKET_LOCK(s); > + if (sc->sc_flags & SCF_DEAD) > + goto out; > > if (__predict_false(sc->sc_rxtshift == TCP_MAXRXTSHIFT)) { > /* Drop it -- too many retransmissions. */ > @@ -3549,14 +3547,15 @@ syn_cache_timer(void *arg) > sc->sc_rxtshift++; > SYN_CACHE_TIMER_ARM(sc); > > - splx(s); > + out: > + SOCKET_UNLOCK(s); > return; > > dropit: > tcpstat.tcps_sc_timed_out++; > syn_cache_rm(sc); > syn_cache_put(sc); > - splx(s); > + SOCKET_UNLOCK(s); > } > > void > diff --git sys/netinet/tcp_timer.c sys/netinet/tcp_timer.c > index 6f4f07e..6ef40fd 100644 > --- sys/netinet/tcp_timer.c > +++ sys/netinet/tcp_timer.c > @@ -112,15 +112,13 @@ tcp_delack(void *arg) > * for whatever reason, it will restart the delayed > * ACK callout. > */ > - > - s = splsoftnet(); > - if (tp->t_flags & TF_DEAD) { > - splx(s); > - return; > - } > + SOCKET_LOCK(s); > + if (tp->t_flags & TF_DEAD) > + goto out; > tp->t_flags |= TF_ACKNOW; > (void) tcp_output(tp); > - splx(s); > + out: > + SOCKET_UNLOCK(s); > } > > /* > @@ -193,11 +191,9 @@ tcp_timer_rexmt(void *arg) > uint32_t rto; > int s; > > - s = splsoftnet(); > - if (tp->t_flags & TF_DEAD) { > - splx(s); > - return; > - } > + SOCKET_LOCK(s); > + if (tp->t_flags & TF_DEAD) > + goto out; > > if ((tp->t_flags & TF_PMTUD_PEND) && tp->t_inpcb && > SEQ_GEQ(tp->t_pmtud_th_seq, tp->snd_una) && > @@ -224,8 +220,7 @@ tcp_timer_rexmt(void *arg) > sin.sin_addr = tp->t_inpcb->inp_faddr; > in_pcbnotifyall(&tcbtable, sintosa(&sin), > tp->t_inpcb->inp_rtableid, EMSGSIZE, tcp_mtudisc); > - splx(s); > - return; > + goto out; > } > > #ifdef TCP_SACK > @@ -376,7 +371,7 @@ tcp_timer_rexmt(void *arg) > (void) tcp_output(tp); > > out: > - splx(s); > + SOCKET_UNLOCK(s); > } > > void > @@ -386,11 +381,10 @@ tcp_timer_persist(void *arg) > uint32_t rto; > int s; > > - s = splsoftnet(); > + SOCKET_LOCK(s); > if ((tp->t_flags & TF_DEAD) || > TCP_TIMER_ISARMED(tp, TCPT_REXMT)) { > - splx(s); > - return; > + goto out; > } > tcpstat.tcps_persisttimeo++; > /* > @@ -415,7 +409,7 @@ tcp_timer_persist(void *arg) > (void) tcp_output(tp); > tp->t_force = 0; > out: > - splx(s); > + SOCKET_UNLOCK(s); > } > > void > @@ -424,11 +418,9 @@ tcp_timer_keep(void *arg) > struct tcpcb *tp = arg; > int s; > > - s = splsoftnet(); > - if (tp->t_flags & TF_DEAD) { > - splx(s); > - return; > - } > + SOCKET_LOCK(s); > + if (tp->t_flags & TF_DEAD) > + goto out; > > tcpstat.tcps_keeptimeo++; > if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) > @@ -457,15 +449,14 @@ tcp_timer_keep(void *arg) > TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl); > } else > TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle); > - > - splx(s); > + out: > + SOCKET_UNLOCK(s); > return; > > dropit: > tcpstat.tcps_keepdrops++; > tp = tcp_drop(tp, ETIMEDOUT); > - > - splx(s); > + SOCKET_UNLOCK(s); > } > > void > @@ -474,11 +465,9 @@ tcp_timer_2msl(void *arg) > struct tcpcb *tp = arg; > int s; > > - s = splsoftnet(); > - if (tp->t_flags & TF_DEAD) { > - splx(s); > - return; > - } > + SOCKET_LOCK(s); > + if (tp->t_flags & TF_DEAD) > + goto out; > > #ifdef TCP_SACK > tcp_timer_freesack(tp); > @@ -490,5 +479,6 @@ tcp_timer_2msl(void *arg) > else > tp = tcp_close(tp); > > - splx(s); > + out: > + SOCKET_UNLOCK(s); > } > diff --git sys/netinet6/icmp6.c sys/netinet6/icmp6.c > index c918004..2abbc12 100644 > --- sys/netinet6/icmp6.c > +++ sys/netinet6/icmp6.c > @@ -1914,17 +1914,14 @@ icmp6_mtudisc_clone(struct sockaddr *dst, u_int > rdomain) > if ((rt->rt_flags & RTF_HOST) == 0) { > struct rt_addrinfo info; > struct rtentry *nrt; > - int s; > > bzero(&info, sizeof(info)); > info.rti_flags = RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC; > info.rti_info[RTAX_DST] = dst; > info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; > > - s = splsoftnet(); > error = rtrequest(RTM_ADD, &info, rt->rt_priority, &nrt, > rdomain); > - splx(s); > if (error) { > rtfree(rt); > return NULL; > @@ -1947,16 +1944,15 @@ void > icmp6_mtudisc_timeout(struct rtentry *rt, struct rttimer *r) > { > struct ifnet *ifp; > - int s; > + > + SOCKET_ASSERT_LOCKED(); > > ifp = if_get(rt->rt_ifidx); > if (ifp == NULL) > return; > > if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST)) { > - s = splsoftnet(); > rtdeletemsg(rt, ifp, r->rtt_tableid); > - splx(s); > } else { > if (!(rt->rt_rmx.rmx_locks & RTV_MTU)) > rt->rt_rmx.rmx_mtu = 0; > @@ -1969,17 +1965,15 @@ void > icmp6_redirect_timeout(struct rtentry *rt, struct rttimer *r) > { > struct ifnet *ifp; > - int s; > + > + SOCKET_ASSERT_LOCKED(); > > ifp = if_get(rt->rt_ifidx); > if (ifp == NULL) > return; > > - if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST)) { > - s = splsoftnet(); > + if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST)) > rtdeletemsg(rt, ifp, r->rtt_tableid); > - splx(s); > - } > > if_put(ifp); > } > diff --git sys/netinet6/ip6_input.c sys/netinet6/ip6_input.c > index 9ac2555..aed3ebd 100644 > --- sys/netinet6/ip6_input.c > +++ sys/netinet6/ip6_input.c > @@ -1429,12 +1429,15 @@ ip6_send_dispatch(void *xmq) > int s; > > mq_delist(mq, &ml); > + if (ml_empty(&ml)) > + return; > + > KERNEL_LOCK(); > - s = splsoftnet(); > + SOCKET_LOCK(s); > while ((m = ml_dequeue(&ml)) != NULL) { > ip6_output(m, NULL, NULL, IPV6_MINMTU, NULL, NULL); > } > - splx(s); > + SOCKET_UNLOCK(s); > KERNEL_UNLOCK(); > } > > diff --git sys/netinet6/nd6.c sys/netinet6/nd6.c > index 34c8d9c..66e6068 100644 > --- sys/netinet6/nd6.c > +++ sys/netinet6/nd6.c > @@ -308,10 +308,6 @@ skip1: > void > nd6_llinfo_settimer(struct llinfo_nd6 *ln, int secs) > { > - int s; > - > - s = splsoftnet(); > - > if (secs < 0) { > ln->ln_rt->rt_expire = 0; > timeout_del(&ln->ln_timer_ch); > @@ -319,8 +315,6 @@ nd6_llinfo_settimer(struct llinfo_nd6 *ln, int secs) > ln->ln_rt->rt_expire = time_uptime + secs; > timeout_add_sec(&ln->ln_timer_ch, secs); > } > - > - splx(s); > } > > void > @@ -333,14 +327,14 @@ nd6_llinfo_timer(void *arg) > struct ifnet *ifp; > struct nd_ifinfo *ndi = NULL; > > - s = splsoftnet(); > + SOCKET_LOCK(s); > > ln = (struct llinfo_nd6 *)arg; > > if ((rt = ln->ln_rt) == NULL) > panic("ln->ln_rt == NULL"); > if ((ifp = if_get(rt->rt_ifidx)) == NULL) { > - splx(s); > + SOCKET_UNLOCK(s); > return; > } > ndi = ND_IFINFO(ifp); > @@ -427,7 +421,7 @@ nd6_llinfo_timer(void *arg) > } > > if_put(ifp); > - splx(s); > + SOCKET_UNLOCK(s); > } > > /* > @@ -989,7 +983,7 @@ nd6_rtrequest(struct ifnet *ifp, int req, struct rtentry > *rt) > nd6_inuse++; > nd6_allocated++; > ln->ln_rt = rt; > - timeout_set(&ln->ln_timer_ch, nd6_llinfo_timer, ln); > + timeout_set_proc(&ln->ln_timer_ch, nd6_llinfo_timer, ln); > /* this is required for "ndp" command. - shin */ > if (req == RTM_ADD) { > /* > diff --git sys/sys/systm.h sys/sys/systm.h > index 5ef388b..56d57d3 100644 > --- sys/sys/systm.h > +++ sys/sys/systm.h > @@ -290,6 +290,31 @@ struct uio; > int uiomove(void *, size_t, struct uio *); > > #if defined(_KERNEL) > +/* > + * Serialize socket operations to ensure that code paths that were > + * atomically executed stay atomic until we turn then mpsafe. > + */ > +extern struct rwlock socketlock; > + > +#define SOCKET_LOCK(s) > \ > +do { \ > + rw_enter_write(&socketlock); \ > + s = splsoftnet(); \ > +} while (/* CONSTCOND */ 0) > + > +#define SOCKET_UNLOCK(s) > \ > +do { \ > + splx(s); \ > + rw_exit_write(&socketlock); \ > +} while (/* CONSTCOND */ 0) > + > +#define SOCKET_ASSERT_LOCKED() > \ > +do { \ > + if (rw_status(&socketlock) != RW_WRITE) \ > + splassert_fail(RW_WRITE, rw_status(&socketlock), __func__);\ > + splsoftassert(IPL_SOFTNET); \ > +} while (0) > + > __returns_twice int setjmp(label_t *); > __dead void longjmp(label_t *); > #endif > >
