One of the reasons for the drop of performances in the kqueue-based poll/select is the fact that kqueue filters are called up to 3 times per syscall and that they all spin on the NET_LOCK() for TCP/UDP packets.
Diff below is a RFC for improving the situation. socket kqueue filters mainly check for the amount of available items to read/write. This involves comparing various socket buffer fields (sb_cc, sb_lowat, etc). The diff below introduces a new mutex to serialize updates of those fields with reads in the kqueue filters. Since these fields are always modified with the socket lock held, either the mutex or the solock are enough to have a coherent view of them. Note that either of these locks is necessary only if multiple fields have to be read (like in sbspace()). Other per-socket fields accessed in the kqueue filters are never combined (with &&) to determine a condition. So assuming it is fine to read register-sized fields w/o the socket lock we can safely remove it there. Could such mutex also be used to serialize klist updates? Comments? diff --git sys/kern/uipc_socket.c sys/kern/uipc_socket.c index dce20208828..d1cb9f4fc3b 100644 --- sys/kern/uipc_socket.c +++ sys/kern/uipc_socket.c @@ -181,6 +181,8 @@ socreate(int dom, struct socket **aso, int type, int proto) so->so_egid = p->p_ucred->cr_gid; so->so_cpid = p->p_p->ps_pid; so->so_proto = prp; + mtx_init(&so->so_snd.sb_mtx, IPL_MPFLOOR); + mtx_init(&so->so_rcv.sb_mtx, IPL_MPFLOOR); so->so_snd.sb_timeo_nsecs = INFSLP; so->so_rcv.sb_timeo_nsecs = INFSLP; @@ -276,7 +278,9 @@ sofree(struct socket *so, int s) } } #endif /* SOCKET_SPLICE */ + mtx_enter(&so->so_snd.sb_mtx); sbrelease(so, &so->so_snd); + mtx_leave(&so->so_snd.sb_mtx); sorflush(so); sounlock(so, s); #ifdef SOCKET_SPLICE @@ -1019,8 +1023,10 @@ dontblock: *mp = m_copym(m, 0, len, M_WAIT); m->m_data += len; m->m_len -= len; + mtx_enter(&so->so_rcv.sb_mtx); so->so_rcv.sb_cc -= len; so->so_rcv.sb_datacc -= len; + mtx_leave(&so->so_rcv.sb_mtx); } } if (so->so_oobmark) { @@ -1537,8 +1543,10 @@ somove(struct socket *so, int wait) } so->so_rcv.sb_mb->m_data += size; so->so_rcv.sb_mb->m_len -= size; + mtx_enter(&so->so_rcv.sb_mtx); so->so_rcv.sb_cc -= size; so->so_rcv.sb_datacc -= size; + mtx_leave(&so->so_rcv.sb_mtx); } else { *mp = so->so_rcv.sb_mb; sbfree(&so->so_rcv, *mp); @@ -1777,30 +1785,40 @@ sosetopt(struct socket *so, int level, int optname, struct mbuf *m) case SO_SNDBUF: if (so->so_state & SS_CANTSENDMORE) return (EINVAL); + mtx_enter(&so->so_snd.sb_mtx); if (sbcheckreserve(cnt, so->so_snd.sb_wat) || sbreserve(so, &so->so_snd, cnt)) - return (ENOBUFS); - so->so_snd.sb_wat = cnt; + error = ENOBUFS; + if (error == 0) + so->so_snd.sb_wat = cnt; + mtx_leave(&so->so_snd.sb_mtx); break; case SO_RCVBUF: if (so->so_state & SS_CANTRCVMORE) return (EINVAL); + mtx_enter(&so->so_rcv.sb_mtx); if (sbcheckreserve(cnt, so->so_rcv.sb_wat) || sbreserve(so, &so->so_rcv, cnt)) - return (ENOBUFS); - so->so_rcv.sb_wat = cnt; + error = ENOBUFS; + if (error == 0) + so->so_rcv.sb_wat = cnt; + mtx_leave(&so->so_rcv.sb_mtx); break; case SO_SNDLOWAT: + mtx_enter(&so->so_snd.sb_mtx); so->so_snd.sb_lowat = (cnt > so->so_snd.sb_hiwat) ? so->so_snd.sb_hiwat : cnt; + mtx_leave(&so->so_snd.sb_mtx); break; case SO_RCVLOWAT: + mtx_leave(&so->so_rcv.sb_mtx); so->so_rcv.sb_lowat = (cnt > so->so_rcv.sb_hiwat) ? so->so_rcv.sb_hiwat : cnt; + mtx_leave(&so->so_rcv.sb_mtx); break; } break; @@ -2077,9 +2095,10 @@ filt_sordetach(struct knote *kn) int filt_soread_common(struct knote *kn, struct socket *so) { + u_int sostate = so->so_state; int rv = 0; - soassertlocked(so); + MUTEX_ASSERT_LOCKED(&so->so_rcv.sb_mtx); kn->kn_data = so->so_rcv.sb_cc; #ifdef SOCKET_SPLICE @@ -2088,15 +2107,17 @@ filt_soread_common(struct knote *kn, struct socket *so) } else #endif /* SOCKET_SPLICE */ if (kn->kn_sfflags & NOTE_OOB) { - if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) { + u_long oobmark = so->so_oobmark; + + if (oobmark || (sostate & SS_RCVATMARK)) { kn->kn_fflags |= NOTE_OOB; - kn->kn_data -= so->so_oobmark; + kn->kn_data -= oobmark; rv = 1; } - } else if (so->so_state & SS_CANTRCVMORE) { + } else if (sostate & SS_CANTRCVMORE) { kn->kn_flags |= EV_EOF; if (kn->kn_flags & __EV_POLL) { - if (so->so_state & SS_ISDISCONNECTED) + if (sostate & SS_ISDISCONNECTED) kn->kn_flags |= __EV_HUP; } kn->kn_fflags = so->so_error; @@ -2116,20 +2137,25 @@ int filt_soread(struct knote *kn, long hint) { struct socket *so = kn->kn_fp->f_data; + int rv; + + mtx_enter(&so->so_rcv.sb_mtx); + rv = filt_soread_common(kn, so); + mtx_leave(&so->so_rcv.sb_mtx); - return (filt_soread_common(kn, so)); + return (rv); } int filt_soreadmodify(struct kevent *kev, struct knote *kn) { struct socket *so = kn->kn_fp->f_data; - int rv, s; + int rv; - s = solock(so); + mtx_enter(&so->so_rcv.sb_mtx); knote_modify(kev, kn); rv = filt_soread_common(kn, so); - sounlock(so, s); + mtx_leave(&so->so_rcv.sb_mtx); return (rv); } @@ -2138,16 +2164,16 @@ int filt_soreadprocess(struct knote *kn, struct kevent *kev) { struct socket *so = kn->kn_fp->f_data; - int rv, s; + int rv; - s = solock(so); + mtx_enter(&so->so_rcv.sb_mtx); if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) rv = 1; else rv = filt_soread_common(kn, so); if (rv != 0) knote_submit(kn, kev); - sounlock(so, s); + mtx_leave(&so->so_rcv.sb_mtx); return (rv); } @@ -2165,22 +2191,23 @@ filt_sowdetach(struct knote *kn) int filt_sowrite_common(struct knote *kn, struct socket *so) { + u_int sostate = so->so_state; int rv; - soassertlocked(so); + MUTEX_ASSERT_LOCKED(&so->so_snd.sb_mtx); - kn->kn_data = sbspace(so, &so->so_snd); - if (so->so_state & SS_CANTSENDMORE) { + kn->kn_data = sbspace_locked(so, &so->so_snd); + if (sostate & SS_CANTSENDMORE) { kn->kn_flags |= EV_EOF; if (kn->kn_flags & __EV_POLL) { - if (so->so_state & SS_ISDISCONNECTED) + if (sostate & SS_ISDISCONNECTED) kn->kn_flags |= __EV_HUP; } kn->kn_fflags = so->so_error; rv = 1; } else if (so->so_error) { /* temporary udp error */ rv = 1; - } else if (((so->so_state & SS_ISCONNECTED) == 0) && + } else if (((sostate & SS_ISCONNECTED) == 0) && (so->so_proto->pr_flags & PR_CONNREQUIRED)) { rv = 0; } else if (kn->kn_sfflags & NOTE_LOWAT) { @@ -2196,20 +2223,25 @@ int filt_sowrite(struct knote *kn, long hint) { struct socket *so = kn->kn_fp->f_data; + int rv; + + mtx_enter(&so->so_snd.sb_mtx); + rv = filt_sowrite_common(kn, so); + mtx_leave(&so->so_snd.sb_mtx); - return (filt_sowrite_common(kn, so)); + return (rv); } int filt_sowritemodify(struct kevent *kev, struct knote *kn) { struct socket *so = kn->kn_fp->f_data; - int rv, s; + int rv; - s = solock(so); + mtx_enter(&so->so_snd.sb_mtx); knote_modify(kev, kn); rv = filt_sowrite_common(kn, so); - sounlock(so, s); + mtx_leave(&so->so_snd.sb_mtx); return (rv); } @@ -2218,16 +2250,16 @@ int filt_sowriteprocess(struct knote *kn, struct kevent *kev) { struct socket *so = kn->kn_fp->f_data; - int rv, s; + int rv; - s = solock(so); + mtx_enter(&so->so_snd.sb_mtx); if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) rv = 1; else rv = filt_sowrite_common(kn, so); if (rv != 0) knote_submit(kn, kev); - sounlock(so, s); + mtx_leave(&so->so_snd.sb_mtx); return (rv); } @@ -2235,8 +2267,6 @@ filt_sowriteprocess(struct knote *kn, struct kevent *kev) int filt_solisten_common(struct knote *kn, struct socket *so) { - soassertlocked(so); - kn->kn_data = so->so_qlen; return (kn->kn_data != 0); @@ -2254,12 +2284,12 @@ int filt_solistenmodify(struct kevent *kev, struct knote *kn) { struct socket *so = kn->kn_fp->f_data; - int rv, s; + int rv; - s = solock(so); + mtx_enter(&so->so_rcv.sb_mtx); knote_modify(kev, kn); rv = filt_solisten_common(kn, so); - sounlock(so, s); + mtx_leave(&so->so_rcv.sb_mtx); return (rv); } @@ -2268,16 +2298,16 @@ int filt_solistenprocess(struct knote *kn, struct kevent *kev) { struct socket *so = kn->kn_fp->f_data; - int rv, s; + int rv; - s = solock(so); + mtx_enter(&so->so_rcv.sb_mtx); if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) rv = 1; else rv = filt_solisten_common(kn, so); if (rv != 0) knote_submit(kn, kev); - sounlock(so, s); + mtx_leave(&so->so_rcv.sb_mtx); return (rv); } diff --git sys/kern/uipc_socket2.c sys/kern/uipc_socket2.c index 69ec14230e6..b2f25ab8d40 100644 --- sys/kern/uipc_socket2.c +++ sys/kern/uipc_socket2.c @@ -34,7 +34,6 @@ #include <sys/param.h> #include <sys/systm.h> -#include <sys/malloc.h> #include <sys/mbuf.h> #include <sys/protosw.h> #include <sys/domain.h> @@ -163,6 +162,8 @@ sonewconn(struct socket *head, int connstatus) if (so == NULL) return (NULL); rw_init(&so->so_lock, "solock"); + mtx_init(&so->so_snd.sb_mtx, IPL_MPFLOOR); + mtx_init(&so->so_rcv.sb_mtx, IPL_MPFLOOR); so->so_type = head->so_type; so->so_options = head->so_options &~ SO_ACCEPTCONN; so->so_linger = head->so_linger; @@ -463,8 +464,10 @@ soreserve(struct socket *so, u_long sndcc, u_long rcvcc) { soassertlocked(so); + mtx_enter(&so->so_snd.sb_mtx); if (sbreserve(so, &so->so_snd, sndcc)) goto bad; + mtx_enter(&so->so_rcv.sb_mtx); if (sbreserve(so, &so->so_rcv, rcvcc)) goto bad2; so->so_snd.sb_wat = sndcc; @@ -475,10 +478,14 @@ soreserve(struct socket *so, u_long sndcc, u_long rcvcc) so->so_snd.sb_lowat = MCLBYTES; if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) so->so_snd.sb_lowat = so->so_snd.sb_hiwat; + mtx_leave(&so->so_rcv.sb_mtx); + mtx_leave(&so->so_snd.sb_mtx); return (0); bad2: + mtx_leave(&so->so_rcv.sb_mtx); sbrelease(so, &so->so_snd); bad: + mtx_leave(&so->so_snd.sb_mtx); return (ENOBUFS); } @@ -492,6 +499,7 @@ sbreserve(struct socket *so, struct sockbuf *sb, u_long cc) { KASSERT(sb == &so->so_rcv || sb == &so->so_snd); soassertlocked(so); + MUTEX_ASSERT_LOCKED(&sb->sb_mtx); if (cc == 0 || cc > sb_max) return (1); @@ -533,6 +541,7 @@ sbchecklowmem(void) void sbrelease(struct socket *so, struct sockbuf *sb) { + MUTEX_ASSERT_LOCKED(&sb->sb_mtx); sbflush(so, sb); sb->sb_hiwat = sb->sb_mbmax = 0; @@ -686,6 +695,7 @@ sbcheck(struct sockbuf *sb) struct mbuf *m, *n; u_long len = 0, mbcnt = 0; + mtx_enter(&sb->sb_mtx); for (m = sb->sb_mb; m; m = m->m_nextpkt) { for (n = m; n; n = n->m_next) { len += n->m_len; @@ -701,6 +711,7 @@ sbcheck(struct sockbuf *sb) mbcnt, sb->sb_mbcnt); panic("sbcheck"); } + mtx_leave(&sb->sb_mtx); } #endif @@ -908,9 +919,11 @@ sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) memcpy(mtod(n, caddr_t) + n->m_len, mtod(m, caddr_t), m->m_len); n->m_len += m->m_len; + mtx_enter(&sb->sb_mtx); sb->sb_cc += m->m_len; if (m->m_type != MT_CONTROL && m->m_type != MT_SONAME) sb->sb_datacc += m->m_len; + mtx_leave(&sb->sb_mtx); m = m_free(m); continue; } @@ -943,6 +956,7 @@ sbflush(struct socket *so, struct sockbuf *sb) { KASSERT(sb == &so->so_rcv || sb == &so->so_snd); KASSERT((sb->sb_flags & SB_LOCK) == 0); + MUTEX_ASSERT_LOCKED(&sb->sb_mtx); while (sb->sb_mbcnt) sbdrop(so, sb, (int)sb->sb_cc); @@ -965,6 +979,7 @@ sbdrop(struct socket *so, struct sockbuf *sb, int len) KASSERT(sb == &so->so_rcv || sb == &so->so_snd); soassertlocked(so); + MUTEX_ASSERT_LOCKED(&sb->sb_mtx); next = (m = sb->sb_mb) ? m->m_nextpkt : NULL; while (len > 0) { @@ -984,12 +999,12 @@ sbdrop(struct socket *so, struct sockbuf *sb, int len) break; } len -= m->m_len; - sbfree(sb, m); + sbfree_locked(sb, m); mn = m_free(m); m = mn; } while (m && m->m_len == 0) { - sbfree(sb, m); + sbfree_locked(sb, m); mn = m_free(m); m = mn; } diff --git sys/kern/uipc_usrreq.c sys/kern/uipc_usrreq.c index 83e62bad6c1..73f59e61160 100644 --- sys/kern/uipc_usrreq.c +++ sys/kern/uipc_usrreq.c @@ -208,8 +208,10 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, * Adjust backpressure on sender * and wakeup any waiting to write. */ + mtx_enter(&so2->so_snd.sb_mtx); so2->so_snd.sb_mbcnt = so->so_rcv.sb_mbcnt; so2->so_snd.sb_cc = so->so_rcv.sb_cc; + mtx_leave(&so2->so_snd.sb_mtx); sowwakeup(so2); break; @@ -284,8 +286,10 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, sbappendrecord(so2, &so2->so_rcv, m); else sbappend(so2, &so2->so_rcv, m); + mtx_enter(&so2->so_snd.sb_mtx); so->so_snd.sb_mbcnt = so2->so_rcv.sb_mbcnt; so->so_snd.sb_cc = so2->so_rcv.sb_cc; + mtx_leave(&so2->so_snd.sb_mtx); if (so2->so_rcv.sb_cc > 0) sorwakeup(so2); m = NULL; @@ -736,12 +740,16 @@ unp_disconnect(struct unpcb *unp) case SOCK_STREAM: case SOCK_SEQPACKET: + mtx_enter(&unp->unp_socket->so_snd.sb_mtx); unp->unp_socket->so_snd.sb_mbcnt = 0; unp->unp_socket->so_snd.sb_cc = 0; + mtx_leave(&unp->unp_socket->so_snd.sb_mtx); soisdisconnected(unp->unp_socket); unp2->unp_conn = NULL; + mtx_enter(&unp2->unp_socket->so_snd.sb_mtx); unp2->unp_socket->so_snd.sb_mbcnt = 0; unp2->unp_socket->so_snd.sb_cc = 0; + mtx_leave(&unp2->unp_socket->so_snd.sb_mtx); soisdisconnected(unp2->unp_socket); break; } diff --git sys/netinet/tcp_input.c sys/netinet/tcp_input.c index cd0c12dcd3b..c60bddc9f55 100644 --- sys/netinet/tcp_input.c +++ sys/netinet/tcp_input.c @@ -946,7 +946,9 @@ findpcb: tcpstat_pkt(tcps_rcvackpack, tcps_rcvackbyte, acked); ND6_HINT(tp); + mtx_enter(&so->so_snd.sb_mtx); sbdrop(so, &so->so_snd, acked); + mtx_leave(&so->so_snd.sb_mtx); /* * If we had a pending ICMP message that @@ -1714,6 +1716,7 @@ trimthenstep6: TCP_MAXWIN << tp->snd_scale); } ND6_HINT(tp); + mtx_enter(&so->so_snd.sb_mtx); if (acked > so->so_snd.sb_cc) { if (tp->snd_wnd > so->so_snd.sb_cc) tp->snd_wnd -= so->so_snd.sb_cc; @@ -1729,6 +1732,7 @@ trimthenstep6: tp->snd_wnd = 0; ourfinisacked = 0; } + mtx_leave(&so->so_snd.sb_mtx); tcp_update_sndspace(tp); if (sb_notify(so, &so->so_snd)) { @@ -2967,7 +2971,9 @@ tcp_mss_update(struct tcpcb *tp) bufsize = roundup(bufsize, mss); if (bufsize > sb_max) bufsize = sb_max; + mtx_enter(&so->so_snd.sb_mtx); (void)sbreserve(so, &so->so_snd, bufsize); + mtx_leave(&so->so_snd.sb_mtx); } bufsize = so->so_rcv.sb_hiwat; @@ -2975,7 +2981,9 @@ tcp_mss_update(struct tcpcb *tp) bufsize = roundup(bufsize, mss); if (bufsize > sb_max) bufsize = sb_max; + mtx_enter(&so->so_rcv.sb_mtx); (void)sbreserve(so, &so->so_rcv, bufsize); + mtx_leave(&so->so_rcv.sb_mtx); } } diff --git sys/netinet/tcp_usrreq.c sys/netinet/tcp_usrreq.c index 98d2270d8f4..4652e6a25ce 100644 --- sys/netinet/tcp_usrreq.c +++ sys/netinet/tcp_usrreq.c @@ -688,7 +688,9 @@ tcp_disconnect(struct tcpcb *tp) tp = tcp_drop(tp, 0); else { soisdisconnecting(so); + mtx_enter(&so->so_rcv.sb_mtx); sbflush(so, &so->so_rcv); + mtx_leave(&so->so_rcv.sb_mtx); tp = tcp_usrclosed(tp); if (tp) (void) tcp_output(tp); @@ -1115,6 +1117,7 @@ tcp_update_sndspace(struct tcpcb *tp) struct socket *so = tp->t_inpcb->inp_socket; u_long nmax = so->so_snd.sb_hiwat; + mtx_enter(&so->so_snd.sb_mtx); if (sbchecklowmem()) { /* low on memory try to get rid of some */ if (tcp_sendspace < nmax) @@ -1128,7 +1131,7 @@ tcp_update_sndspace(struct tcpcb *tp) tp->snd_una); /* a writable socket must be preserved because of poll(2) semantics */ - if (sbspace(so, &so->so_snd) >= so->so_snd.sb_lowat) { + if (sbspace_locked(so, &so->so_snd) >= so->so_snd.sb_lowat) { if (nmax < so->so_snd.sb_cc + so->so_snd.sb_lowat) nmax = so->so_snd.sb_cc + so->so_snd.sb_lowat; /* keep in sync with sbreserve() calculation */ @@ -1141,6 +1144,7 @@ tcp_update_sndspace(struct tcpcb *tp) if (nmax != so->so_snd.sb_hiwat) sbreserve(so, &so->so_snd, nmax); + mtx_leave(&so->so_snd.sb_mtx); } /* @@ -1179,5 +1183,7 @@ tcp_update_rcvspace(struct tcpcb *tp) /* round to MSS boundary */ nmax = roundup(nmax, tp->t_maxseg); + mtx_enter(&so->so_rcv.sb_mtx); sbreserve(so, &so->so_rcv, nmax); + mtx_leave(&so->so_rcv.sb_mtx); } diff --git sys/sys/socketvar.h sys/sys/socketvar.h index 72bb18d3e59..32cab70c9bd 100644 --- sys/sys/socketvar.h +++ sys/sys/socketvar.h @@ -33,10 +33,12 @@ */ #include <sys/selinfo.h> /* for struct selinfo */ +#include <sys/systm.h> /* panicstr for MUTEX_ASSERT */ #include <sys/queue.h> #include <sys/sigio.h> /* for struct sigio_ref */ #include <sys/task.h> #include <sys/timeout.h> +#include <sys/mutex.h> #include <sys/rwlock.h> #ifndef _SOCKLEN_T_DEFINED_ @@ -51,6 +53,10 @@ TAILQ_HEAD(soqhead, socket); * Contains send and receive buffer queues, * handle on protocol and pointer to protocol * private data and error information. + * + * Locks used to protect struct members in this file: + * s this socket solock + * m per-sockbuf mutex */ struct socket { const struct protosw *so_proto; /* protocol handle */ @@ -99,15 +105,16 @@ struct socket { * Variables for socket buffering. */ struct sockbuf { + struct mutex sb_mtx; /* The following fields are all zeroed on flush. */ #define sb_startzero sb_cc - u_long sb_cc; /* actual chars in buffer */ - u_long sb_datacc; /* data only chars in buffer */ - u_long sb_hiwat; /* max actual char count */ - u_long sb_wat; /* default watermark */ - u_long sb_mbcnt; /* chars of mbufs used */ - u_long sb_mbmax; /* max chars of mbufs to use */ - long sb_lowat; /* low water mark */ + u_long sb_cc; /* [s|m] actual chars in buffer */ + u_long sb_datacc; /* [s|m] data only chars in buffer */ + u_long sb_hiwat; /* [s|m] max actual char count */ + u_long sb_wat; /* [s|m] default watermark */ + u_long sb_mbcnt; /* [s|m] chars of mbufs used */ + u_long sb_mbmax; /* [s|m] max chars of mbufs to use */ + long sb_lowat; /* [s|m] low water mark */ struct mbuf *sb_mb; /* the mbuf chain */ struct mbuf *sb_mbtail; /* the last mbuf in the chain */ struct mbuf *sb_lastrecord;/* first mbuf of last record in @@ -189,13 +196,27 @@ sb_notify(struct socket *so, struct sockbuf *sb) * overflow and return 0. */ static inline long -sbspace(struct socket *so, struct sockbuf *sb) +sbspace_locked(struct socket *so, struct sockbuf *sb) { KASSERT(sb == &so->so_rcv || sb == &so->so_snd); - soassertlocked(so); + MUTEX_ASSERT_LOCKED(&sb->sb_mtx); + return lmin(sb->sb_hiwat - sb->sb_cc, sb->sb_mbmax - sb->sb_mbcnt); } +static inline long +sbspace(struct socket *so, struct sockbuf *sb) +{ + long space; + + mtx_enter(&sb->sb_mtx); + space = sbspace_locked(so, sb); + mtx_leave(&sb->sb_mtx); + + return space; +} + + /* do we have to send all at once on a socket? */ #define sosendallatonce(so) \ ((so)->so_proto->pr_flags & PR_ATOMIC) @@ -224,16 +245,19 @@ soreadable(struct socket *so) /* adjust counters in sb reflecting allocation of m */ #define sballoc(sb, m) do { \ + mtx_enter(&(sb)->sb_mtx); \ (sb)->sb_cc += (m)->m_len; \ if ((m)->m_type != MT_CONTROL && (m)->m_type != MT_SONAME) \ (sb)->sb_datacc += (m)->m_len; \ (sb)->sb_mbcnt += MSIZE; \ if ((m)->m_flags & M_EXT) \ (sb)->sb_mbcnt += (m)->m_ext.ext_size; \ + mtx_leave(&(sb)->sb_mtx); \ } while (/* CONSTCOND */ 0) /* adjust counters in sb reflecting freeing of m */ -#define sbfree(sb, m) do { \ +#define sbfree_locked(sb, m) do { \ + MUTEX_ASSERT_LOCKED(&sb->sb_mtx); \ (sb)->sb_cc -= (m)->m_len; \ if ((m)->m_type != MT_CONTROL && (m)->m_type != MT_SONAME) \ (sb)->sb_datacc -= (m)->m_len; \ @@ -242,6 +266,12 @@ soreadable(struct socket *so) (sb)->sb_mbcnt -= (m)->m_ext.ext_size; \ } while (/* CONSTCOND */ 0) +#define sbfree(sb, m) do { \ + mtx_enter(&(sb)->sb_mtx); \ + sbfree_locked((sb), (m)); \ + mtx_leave(&(sb)->sb_mtx); \ +} while (/* CONSTCOND */ 0) + /* * Set lock on sockbuf sb; sleep if lock is already held. * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.