On 09/06/17(Fri) 15:54, Martin Pieuchot wrote:
> On 09/06/17(Fri) 00:32, Alexander Bluhm wrote:
> > On Tue, Jun 06, 2017 at 05:15:40PM +0200, Martin Pieuchot wrote:
> > > TCP/UDP are almost ready to run without KERNEL_LOCK() because accesses
> > > to their sockets are serialized via the NET_LOCK(). On the other hand
> > > pfkey and routing sockets accesses still rely on the KERNEL_LOCK().
> > >
> > > Since we're going to work at the socket layer, first to remove the
> > > KERNEL_LOCK() from routing/pfkey sockets then to split the NET_LOCK(),
> > > we need some tooling to move faster and avoid mistakes.
> > >
> > > Currently all operations on socket buffers are protected by these
> > > locks. I'd like to assert that, at least for all functions used in
> > > TCP/UDP layers.
> > >
> > > The idea is to later change the lock asserted in soassertlocked().
> > >
> > > Comments, ok?
> >
> > Good idea, mostly OK.
>
> Updated diff:
>
> - use a clever trick in sorflush() to make the new assert happy.
> - convert sbappendrecord() and sbappend() for coherency
> - fix other nits bluhm@ spotted
> - add a missing solock()/sounlock() in filt_sowrite(). Note that
> this doesn't make the whole function mp-safe.
New version including a fix for kqueue filters.
Index: kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.186
diff -u -p -r1.186 uipc_socket.c
--- kern/uipc_socket.c 31 May 2017 08:55:10 -0000 1.186
+++ kern/uipc_socket.c 19 Jun 2017 13:43:27 -0000
@@ -216,7 +216,7 @@ sofree(struct socket *so)
so->so_sp = NULL;
}
#endif /* SOCKET_SPLICE */
- sbrelease(&so->so_snd);
+ sbrelease(so, &so->so_snd);
sorflush(so);
pool_put(&socket_pool, so);
}
@@ -440,7 +440,7 @@ restart:
} else if (addr == 0)
snderr(EDESTADDRREQ);
}
- space = sbspace(&so->so_snd);
+ space = sbspace(so, &so->so_snd);
if (flags & MSG_OOB)
space += 1024;
if ((atomic && resid > so->so_snd.sb_hiwat) ||
@@ -1041,7 +1041,7 @@ sorflush(struct socket *so)
struct sockbuf *sb = &so->so_rcv;
struct protosw *pr = so->so_proto;
sa_family_t af = pr->pr_domain->dom_family;
- struct sockbuf asb;
+ struct socket aso;
sb->sb_flags |= SB_NOINTR;
sblock(sb, M_WAITOK,
@@ -1049,16 +1049,16 @@ sorflush(struct socket *so)
&netlock : NULL);
socantrcvmore(so);
sbunlock(sb);
- asb = *sb;
+ aso.so_rcv = *sb;
memset(sb, 0, sizeof (*sb));
/* XXX - the memset stomps all over so_rcv */
- if (asb.sb_flags & SB_KNOTE) {
- sb->sb_sel.si_note = asb.sb_sel.si_note;
+ if (aso.so_rcv.sb_flags & SB_KNOTE) {
+ sb->sb_sel.si_note = aso.so_rcv.sb_sel.si_note;
sb->sb_flags = SB_KNOTE;
}
if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
- (*pr->pr_domain->dom_dispose)(asb.sb_mb);
- sbrelease(&asb);
+ (*pr->pr_domain->dom_dispose)(aso.so_rcv.sb_mb);
+ sbrelease(&aso, &aso.so_rcv);
}
#ifdef SOCKET_SPLICE
@@ -1157,7 +1157,7 @@ sosplice(struct socket *so, int fd, off_
so->so_idletv = *tv;
else
timerclear(&so->so_idletv);
- timeout_set(&so->so_idleto, soidle, so);
+ timeout_set_proc(&so->so_idleto, soidle, so);
task_set(&so->so_splicetask, sotask, so);
/*
@@ -1270,7 +1270,7 @@ somove(struct socket *so, int wait)
maxreached = 1;
}
}
- space = sbspace(&sosp->so_snd);
+ space = sbspace(sosp, &sosp->so_snd);
if (so->so_oobmark && so->so_oobmark < len &&
so->so_oobmark < space + 1024)
space += 1024;
@@ -1635,7 +1635,7 @@ sosetopt(struct socket *so, int level, i
goto bad;
}
if (sbcheckreserve(cnt, so->so_snd.sb_wat) ||
- sbreserve(&so->so_snd, cnt)) {
+ sbreserve(so, &so->so_snd, cnt)) {
error = ENOBUFS;
goto bad;
}
@@ -1648,7 +1648,7 @@ sosetopt(struct socket *so, int level, i
goto bad;
}
if (sbcheckreserve(cnt, so->so_rcv.sb_wat) ||
- sbreserve(&so->so_rcv, cnt)) {
+ sbreserve(so, &so->so_rcv, cnt)) {
error = ENOBUFS;
goto bad;
}
@@ -1990,8 +1990,13 @@ int
filt_sowrite(struct knote *kn, long hint)
{
struct socket *so = kn->kn_fp->f_data;
+ int s;
- kn->kn_data = sbspace(&so->so_snd);
+ if (!(hint & NOTE_SUBMIT))
+ s = solock(so);
+ kn->kn_data = sbspace(so, &so->so_snd);
+ if (!(hint & NOTE_SUBMIT))
+ sounlock(s);
if (so->so_state & SS_CANTSENDMORE) {
kn->kn_flags |= EV_EOF;
kn->kn_fflags = so->so_error;
Index: kern/uipc_usrreq.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_usrreq.c,v
retrieving revision 1.117
diff -u -p -r1.117 uipc_usrreq.c
--- kern/uipc_usrreq.c 13 Mar 2017 20:18:21 -0000 1.117
+++ kern/uipc_usrreq.c 19 Jun 2017 10:28:00 -0000
@@ -222,7 +222,7 @@ uipc_usrreq(struct socket *so, int req,
from = mtod(unp->unp_addr, struct sockaddr *);
else
from = &sun_noname;
- if (sbappendaddr(&so2->so_rcv, from, m, control)) {
+ if (sbappendaddr(so2, &so2->so_rcv, from, m, control)) {
sorwakeup(so2);
m = NULL;
control = NULL;
@@ -252,16 +252,16 @@ uipc_usrreq(struct socket *so, int req,
* Wake up readers.
*/
if (control) {
- if (sbappendcontrol(rcv, m, control))
+ if (sbappendcontrol(so2, rcv, m, control))
control = NULL;
else {
error = ENOBUFS;
break;
}
} else if (so->so_type == SOCK_SEQPACKET)
- sbappendrecord(rcv, m);
+ sbappendrecord(so2, rcv, m);
else
- sbappend(rcv, m);
+ sbappend(so2, rcv, m);
snd->sb_mbcnt = rcv->sb_mbcnt;
snd->sb_cc = rcv->sb_cc;
sorwakeup(so2);
Index: kern/uipc_socket2.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
retrieving revision 1.78
diff -u -p -r1.78 uipc_socket2.c
--- kern/uipc_socket2.c 7 Jun 2017 13:41:02 -0000 1.78
+++ kern/uipc_socket2.c 19 Jun 2017 10:28:00 -0000
@@ -436,9 +436,9 @@ int
soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
{
- if (sbreserve(&so->so_snd, sndcc))
+ if (sbreserve(so, &so->so_snd, sndcc))
goto bad;
- if (sbreserve(&so->so_rcv, rcvcc))
+ if (sbreserve(so, &so->so_rcv, rcvcc))
goto bad2;
so->so_snd.sb_wat = sndcc;
so->so_rcv.sb_wat = rcvcc;
@@ -450,7 +450,7 @@ soreserve(struct socket *so, u_long sndc
so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
return (0);
bad2:
- sbrelease(&so->so_snd);
+ sbrelease(so, &so->so_snd);
bad:
return (ENOBUFS);
}
@@ -461,8 +461,10 @@ bad:
* if buffering efficiency is near the normal case.
*/
int
-sbreserve(struct sockbuf *sb, u_long cc)
+sbreserve(struct socket *so, struct sockbuf *sb, u_long cc)
{
+ KASSERT(sb == &so->so_rcv || sb == &so->so_snd);
+ soassertlocked(so);
if (cc == 0 || cc > sb_max)
return (1);
@@ -503,10 +505,10 @@ sbchecklowmem(void)
* Free mbufs held by a socket, and reserved mbuf space.
*/
void
-sbrelease(struct sockbuf *sb)
+sbrelease(struct socket *so, struct sockbuf *sb)
{
- sbflush(sb);
+ sbflush(so, sb);
sb->sb_hiwat = sb->sb_mbmax = 0;
}
@@ -597,7 +599,7 @@ do {
\
* discarded and mbufs are compacted where possible.
*/
void
-sbappend(struct sockbuf *sb, struct mbuf *m)
+sbappend(struct socket *so, struct sockbuf *sb, struct mbuf *m)
{
struct mbuf *n;
@@ -614,7 +616,7 @@ sbappend(struct sockbuf *sb, struct mbuf
*/
do {
if (n->m_flags & M_EOR) {
- sbappendrecord(sb, m); /* XXXXXX!!!! */
+ sbappendrecord(so, sb, m); /* XXXXXX!!!! */
return;
}
} while (n->m_next && (n = n->m_next));
@@ -635,9 +637,10 @@ sbappend(struct sockbuf *sb, struct mbuf
* in the socket buffer, that is, a stream protocol (such as TCP).
*/
void
-sbappendstream(struct sockbuf *sb, struct mbuf *m)
+sbappendstream(struct socket *so, struct sockbuf *sb, struct mbuf *m)
{
-
+ KASSERT(sb == &so->so_rcv || sb == &so->so_snd);
+ soassertlocked(so);
KDASSERT(m->m_nextpkt == NULL);
KASSERT(sb->sb_mb == sb->sb_lastrecord);
@@ -679,10 +682,13 @@ sbcheck(struct sockbuf *sb)
* begins a new record.
*/
void
-sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
+sbappendrecord(struct socket *so, struct sockbuf *sb, struct mbuf *m0)
{
struct mbuf *m;
+ KASSERT(sb == &so->so_rcv || sb == &so->so_snd);
+ soassertlocked(so);
+
if (m0 == NULL)
return;
@@ -759,8 +765,8 @@ sbinsertoob(struct sockbuf *sb, struct m
* Returns 0 if no space in sockbuf or insufficient mbufs.
*/
int
-sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0,
- struct mbuf *control)
+sbappendaddr(struct socket *so, struct sockbuf *sb, struct sockaddr *asa,
+ struct mbuf *m0, struct mbuf *control)
{
struct mbuf *m, *n, *nlast;
int space = asa->sa_len;
@@ -774,7 +780,7 @@ sbappendaddr(struct sockbuf *sb, struct
if (n->m_next == NULL) /* keep pointer to last control buf */
break;
}
- if (space > sbspace(sb))
+ if (space > sbspace(so, sb))
return (0);
if (asa->sa_len > MLEN)
return (0);
@@ -806,7 +812,8 @@ sbappendaddr(struct sockbuf *sb, struct
}
int
-sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control)
+sbappendcontrol(struct socket *so, struct sockbuf *sb, struct mbuf *m0,
+ struct mbuf *control)
{
struct mbuf *m, *mlast, *n;
int space = 0;
@@ -821,7 +828,7 @@ sbappendcontrol(struct sockbuf *sb, stru
n = m; /* save pointer to last control buffer */
for (m = m0; m; m = m->m_next)
space += m->m_len;
- if (space > sbspace(sb))
+ if (space > sbspace(so, sb))
return (0);
n->m_next = m0; /* concatenate data to control */
@@ -902,13 +909,13 @@ sbcompress(struct sockbuf *sb, struct mb
* Check that all resources are reclaimed.
*/
void
-sbflush(struct sockbuf *sb)
+sbflush(struct socket *so, struct sockbuf *sb)
{
-
+ KASSERT(sb == &so->so_rcv || sb == &so->so_snd);
KASSERT((sb->sb_flags & SB_LOCK) == 0);
while (sb->sb_mbcnt)
- sbdrop(sb, (int)sb->sb_cc);
+ sbdrop(so, sb, (int)sb->sb_cc);
KASSERT(sb->sb_cc == 0);
KASSERT(sb->sb_datacc == 0);
@@ -921,10 +928,13 @@ sbflush(struct sockbuf *sb)
* Drop data from (the front of) a sockbuf.
*/
void
-sbdrop(struct sockbuf *sb, int len)
+sbdrop(struct socket *so, struct sockbuf *sb, int len)
{
struct mbuf *m, *mn;
struct mbuf *next;
+
+ KASSERT(sb == &so->so_rcv || sb == &so->so_snd);
+ soassertlocked(so);
next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
while (len > 0) {
Index: kern/sys_generic.c
===================================================================
RCS file: /cvs/src/sys/kern/sys_generic.c,v
retrieving revision 1.114
diff -u -p -r1.114 sys_generic.c
--- kern/sys_generic.c 24 Jan 2017 00:58:55 -0000 1.114
+++ kern/sys_generic.c 19 Jun 2017 13:37:50 -0000
@@ -799,7 +799,7 @@ selwakeup(struct selinfo *sip)
struct proc *p;
int s;
- KNOTE(&sip->si_note, 0);
+ KNOTE(&sip->si_note, NOTE_SUBMIT);
if (sip->si_seltid == 0)
return;
if (sip->si_flags & SI_COLL) {
Index: miscfs/fifofs/fifo_vnops.c
===================================================================
RCS file: /cvs/src/sys/miscfs/fifofs/fifo_vnops.c,v
retrieving revision 1.53
diff -u -p -r1.53 fifo_vnops.c
--- miscfs/fifofs/fifo_vnops.c 19 Dec 2016 08:36:49 -0000 1.53
+++ miscfs/fifofs/fifo_vnops.c 19 Jun 2017 10:28:00 -0000
@@ -552,7 +552,7 @@ filt_fifowrite(struct knote *kn, long hi
{
struct socket *so = (struct socket *)kn->kn_hook;
- kn->kn_data = sbspace(&so->so_snd);
+ kn->kn_data = sbspace(so, &so->so_snd);
if (so->so_state & SS_CANTSENDMORE) {
kn->kn_flags |= EV_EOF;
return (1);
Index: net/pfkeyv2.c
===================================================================
RCS file: /cvs/src/sys/net/pfkeyv2.c,v
retrieving revision 1.160
diff -u -p -r1.160 pfkeyv2.c
--- net/pfkeyv2.c 29 May 2017 20:31:12 -0000 1.160
+++ net/pfkeyv2.c 19 Jun 2017 10:28:00 -0000
@@ -327,7 +327,7 @@ ret:
}
int
-pfkey_sendup(struct socket *socket, struct mbuf *packet, int more)
+pfkey_sendup(struct socket *so, struct mbuf *packet, int more)
{
struct mbuf *packet2;
@@ -339,12 +339,12 @@ pfkey_sendup(struct socket *socket, stru
} else
packet2 = packet;
- if (!sbappendaddr(&socket->so_rcv, &pfkey_addr, packet2, NULL)) {
+ if (!sbappendaddr(so, &so->so_rcv, &pfkey_addr, packet2, NULL)) {
m_freem(packet2);
return (ENOBUFS);
}
- sorwakeup(socket);
+ sorwakeup(so);
return (0);
}
Index: net/rtsock.c
===================================================================
RCS file: /cvs/src/sys/net/rtsock.c,v
retrieving revision 1.238
diff -u -p -r1.238 rtsock.c
--- net/rtsock.c 9 Jun 2017 12:56:43 -0000 1.238
+++ net/rtsock.c 19 Jun 2017 10:28:00 -0000
@@ -174,7 +174,7 @@ route_usrreq(struct socket *so, int req,
* empty so that we can clear the flag.
*/
if (((rop->flags & ROUTECB_FLAG_FLUSH) != 0) &&
- ((sbspace(&rp->rcb_socket->so_rcv) ==
+ ((sbspace(rp->rcb_socket, &rp->rcb_socket->so_rcv) ==
rp->rcb_socket->so_rcv.sb_hiwat)))
rop->flags &= ~ROUTECB_FLAG_FLUSH;
break;
@@ -325,7 +325,8 @@ route_senddesync(void *data)
*/
desync_mbuf = rtm_msg1(RTM_DESYNC, NULL);
if (desync_mbuf != NULL) {
- if (sbappendaddr(&rp->rcb_socket->so_rcv, &route_src,
+ struct socket *so = rp->rcb_socket;
+ if (sbappendaddr(so, &so->so_rcv, &route_src,
desync_mbuf, NULL) != 0) {
rop->flags &= ~ROUTECB_FLAG_DESYNC;
sorwakeup(rp->rcb_socket);
@@ -431,8 +432,8 @@ route_input(struct mbuf *m0, struct sock
if (last) {
struct mbuf *n;
if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) {
- if (sbspace(&last->so_rcv) < (2 * MSIZE) ||
- sbappendaddr(&last->so_rcv, sosrc,
+ if (sbspace(last, &last->so_rcv) < (2*MSIZE) ||
+ sbappendaddr(last, &last->so_rcv, sosrc,
n, (struct mbuf *)NULL) == 0) {
/*
* Flag socket as desync'ed and
@@ -452,8 +453,8 @@ route_input(struct mbuf *m0, struct sock
last = rp->rcb_socket;
}
if (last) {
- if (sbspace(&last->so_rcv) < (2 * MSIZE) ||
- sbappendaddr(&last->so_rcv, sosrc,
+ if (sbspace(last, &last->so_rcv) < (2 * MSIZE) ||
+ sbappendaddr(last, &last->so_rcv, sosrc,
m, (struct mbuf *)NULL) == 0) {
/* Flag socket as desync'ed and flush required */
sotoroutecb(last)->flags |=
Index: netinet/ip_divert.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_divert.c,v
retrieving revision 1.47
diff -u -p -r1.47 ip_divert.c
--- netinet/ip_divert.c 30 May 2017 07:50:37 -0000 1.47
+++ netinet/ip_divert.c 19 Jun 2017 10:28:00 -0000
@@ -222,7 +222,7 @@ divert_packet(struct mbuf *m, int dir, u
if (inp) {
sa = inp->inp_socket;
- if (sbappendaddr(&sa->so_rcv, sintosa(&addr), m, NULL) == 0) {
+ if (sbappendaddr(sa, &sa->so_rcv, sintosa(&addr), m, NULL) ==
0) {
divstat_inc(divs_fullsock);
m_freem(m);
return (0);
Index: netinet/ip_mroute.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_mroute.c,v
retrieving revision 1.118
diff -u -p -r1.118 ip_mroute.c
--- netinet/ip_mroute.c 16 May 2017 13:09:21 -0000 1.118
+++ netinet/ip_mroute.c 19 Jun 2017 10:28:00 -0000
@@ -1037,7 +1037,7 @@ int
socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src)
{
if (s != NULL) {
- if (sbappendaddr(&s->so_rcv, sintosa(src), mm, NULL) != 0) {
+ if (sbappendaddr(s, &s->so_rcv, sintosa(src), mm, NULL) != 0) {
sorwakeup(s);
return (0);
}
Index: netinet/raw_ip.c
===================================================================
RCS file: /cvs/src/sys/netinet/raw_ip.c,v
retrieving revision 1.99
diff -u -p -r1.99 raw_ip.c
--- netinet/raw_ip.c 17 Apr 2017 21:10:03 -0000 1.99
+++ netinet/raw_ip.c 19 Jun 2017 10:28:00 -0000
@@ -168,7 +168,8 @@ rip_input(struct mbuf **mp, int *offp, i
if (last->inp_flags & INP_CONTROLOPTS ||
last->inp_socket->so_options & SO_TIMESTAMP)
ip_savecontrol(last, &opts, ip, n);
- if (sbappendaddr(&last->inp_socket->so_rcv,
+ if (sbappendaddr(last->inp_socket,
+ &last->inp_socket->so_rcv,
sintosa(&ripsrc), n, opts) == 0) {
/* should notify about lost packet */
m_freem(n);
@@ -184,8 +185,8 @@ rip_input(struct mbuf **mp, int *offp, i
if (last->inp_flags & INP_CONTROLOPTS ||
last->inp_socket->so_options & SO_TIMESTAMP)
ip_savecontrol(last, &opts, ip, m);
- if (sbappendaddr(&last->inp_socket->so_rcv, sintosa(&ripsrc), m,
- opts) == 0) {
+ if (sbappendaddr(last->inp_socket, &last->inp_socket->so_rcv,
+ sintosa(&ripsrc), m, opts) == 0) {
m_freem(m);
m_freem(opts);
} else
Index: netinet/tcp_usrreq.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.151
diff -u -p -r1.151 tcp_usrreq.c
--- netinet/tcp_usrreq.c 18 May 2017 11:38:07 -0000 1.151
+++ netinet/tcp_usrreq.c 19 Jun 2017 10:28:00 -0000
@@ -355,7 +355,7 @@ tcp_usrreq(struct socket *so, int req, s
* marker if URG set. Possibly send more data.
*/
case PRU_SEND:
- sbappendstream(&so->so_snd, m);
+ sbappendstream(so, &so->so_snd, m);
error = tcp_output(tp);
break;
@@ -389,7 +389,7 @@ tcp_usrreq(struct socket *so, int req, s
break;
case PRU_SENDOOB:
- if (sbspace(&so->so_snd) < -512) {
+ if (sbspace(so, &so->so_snd) < -512) {
m_freem(m);
error = ENOBUFS;
break;
@@ -402,7 +402,7 @@ tcp_usrreq(struct socket *so, int req, s
* of data past the urgent section.
* Otherwise, snd_up should be one lower.
*/
- sbappendstream(&so->so_snd, m);
+ sbappendstream(so, &so->so_snd, m);
tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
tp->t_force = 1;
error = tcp_output(tp);
@@ -662,7 +662,7 @@ tcp_disconnect(struct tcpcb *tp)
tp = tcp_drop(tp, 0);
else {
soisdisconnecting(so);
- sbflush(&so->so_rcv);
+ sbflush(so, &so->so_rcv);
tp = tcp_usrclosed(tp);
if (tp)
(void) tcp_output(tp);
@@ -1111,7 +1111,7 @@ tcp_update_sndspace(struct tcpcb *tp)
tp->snd_una);
/* a writable socket must be preserved because of poll(2) semantics */
- if (sbspace(&so->so_snd) >= so->so_snd.sb_lowat) {
+ if (sbspace(so, &so->so_snd) >= so->so_snd.sb_lowat) {
if (nmax < so->so_snd.sb_cc + so->so_snd.sb_lowat)
nmax = so->so_snd.sb_cc + so->so_snd.sb_lowat;
if (nmax * 2 < so->so_snd.sb_mbcnt + so->so_snd.sb_lowat)
@@ -1122,7 +1122,7 @@ tcp_update_sndspace(struct tcpcb *tp)
nmax = roundup(nmax, tp->t_maxseg);
if (nmax != so->so_snd.sb_hiwat)
- sbreserve(&so->so_snd, nmax);
+ sbreserve(so, &so->so_snd, nmax);
}
/*
@@ -1161,5 +1161,5 @@ tcp_update_rcvspace(struct tcpcb *tp)
/* round to MSS boundary */
nmax = roundup(nmax, tp->t_maxseg);
- sbreserve(&so->so_rcv, nmax);
+ sbreserve(so, &so->so_rcv, nmax);
}
Index: netinet/tcp_subr.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_subr.c,v
retrieving revision 1.164
diff -u -p -r1.164 tcp_subr.c
--- netinet/tcp_subr.c 18 May 2017 11:38:07 -0000 1.164
+++ netinet/tcp_subr.c 19 Jun 2017 10:28:00 -0000
@@ -305,7 +305,8 @@ tcp_respond(struct tcpcb *tp, caddr_t te
int af; /* af on wire */
if (tp) {
- win = sbspace(&tp->t_inpcb->inp_socket->so_rcv);
+ struct socket *so = tp->t_inpcb->inp_socket;
+ win = sbspace(so, &so->so_rcv);
/*
* If this is called with an unconnected
* socket/tp/pcb (tp->pf is 0), we lose.
Index: netinet/tcp_output.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_output.c,v
retrieving revision 1.120
diff -u -p -r1.120 tcp_output.c
--- netinet/tcp_output.c 18 May 2017 11:38:07 -0000 1.120
+++ netinet/tcp_output.c 19 Jun 2017 10:28:00 -0000
@@ -392,7 +392,7 @@ again:
if (off + len < so->so_snd.sb_cc)
flags &= ~TH_FIN;
- win = sbspace(&so->so_rcv);
+ win = sbspace(so, &so->so_rcv);
/*
* Sender silly window avoidance. If connection is idle
Index: netinet/udp_usrreq.c
===================================================================
RCS file: /cvs/src/sys/netinet/udp_usrreq.c,v
retrieving revision 1.237
diff -u -p -r1.237 udp_usrreq.c
--- netinet/udp_usrreq.c 6 May 2017 16:35:59 -0000 1.237
+++ netinet/udp_usrreq.c 19 Jun 2017 10:28:00 -0000
@@ -456,7 +456,7 @@ udp_input(struct mbuf **mp, int *offp, i
ip, n);
m_adj(n, iphlen);
- if (sbappendaddr(
+ if (sbappendaddr(last->inp_socket,
&last->inp_socket->so_rcv,
&srcsa.sa, n, opts) == 0) {
m_freem(n);
@@ -501,7 +501,7 @@ udp_input(struct mbuf **mp, int *offp, i
ip_savecontrol(last, &opts, ip, m);
m_adj(m, iphlen);
- if (sbappendaddr(&last->inp_socket->so_rcv,
+ if (sbappendaddr(last->inp_socket, &last->inp_socket->so_rcv,
&srcsa.sa, m, opts) == 0) {
udpstat_inc(udps_fullsock);
goto bad;
@@ -654,7 +654,8 @@ udp_input(struct mbuf **mp, int *offp, i
iphlen += sizeof(struct udphdr);
m_adj(m, iphlen);
- if (sbappendaddr(&inp->inp_socket->so_rcv, &srcsa.sa, m, opts) == 0) {
+ if (sbappendaddr(inp->inp_socket, &inp->inp_socket->so_rcv, &srcsa.sa,
+ m, opts) == 0) {
udpstat_inc(udps_fullsock);
goto bad;
}
Index: netinet/tcp_input.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_input.c,v
retrieving revision 1.345
diff -u -p -r1.345 tcp_input.c
--- netinet/tcp_input.c 18 May 2017 11:38:07 -0000 1.345
+++ netinet/tcp_input.c 19 Jun 2017 10:28:00 -0000
@@ -339,7 +339,7 @@ tcp_flush_queue(struct tcpcb *tp)
if (so->so_state & SS_CANTRCVMORE)
m_freem(q->tcpqe_m);
else
- sbappendstream(&so->so_rcv, q->tcpqe_m);
+ sbappendstream(so, &so->so_rcv, q->tcpqe_m);
pool_put(&tcpqe_pool, q);
q = nq;
} while (q != NULL && q->tcpqe_tcp->th_seq == tp->rcv_nxt);
@@ -944,7 +944,7 @@ findpcb:
tcpstat_pkt(tcps_rcvackpack, tcps_rcvackbyte,
acked);
ND6_HINT(tp);
- sbdrop(&so->so_snd, acked);
+ sbdrop(so, &so->so_snd, acked);
/*
* If we had a pending ICMP message that
@@ -996,7 +996,7 @@ findpcb:
TCP_TIMER_ARM(tp, TCPT_REXMT,
tp->t_rxtcur);
tcp_update_sndspace(tp);
- if (sb_notify(&so->so_snd)) {
+ if (sb_notify(so, &so->so_snd)) {
tp->t_flags |= TF_BLOCKOUTPUT;
sowwakeup(so);
tp->t_flags &= ~TF_BLOCKOUTPUT;
@@ -1008,7 +1008,7 @@ findpcb:
}
} else if (th->th_ack == tp->snd_una &&
TAILQ_EMPTY(&tp->t_segq) &&
- tlen <= sbspace(&so->so_rcv)) {
+ tlen <= sbspace(so, &so->so_rcv)) {
/*
* This is a pure, in-sequence data packet
* with nothing on the reassembly queue and
@@ -1043,7 +1043,7 @@ findpcb:
tp->rfbuf_cnt += tlen;
}
m_adj(m, iphlen + off);
- sbappendstream(&so->so_rcv, m);
+ sbappendstream(so, &so->so_rcv, m);
}
tp->t_flags |= TF_BLOCKOUTPUT;
sorwakeup(so);
@@ -1067,7 +1067,7 @@ findpcb:
*/
{ int win;
- win = sbspace(&so->so_rcv);
+ win = sbspace(so, &so->so_rcv);
if (win < 0)
win = 0;
tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
@@ -1780,16 +1780,16 @@ trimthenstep6:
ND6_HINT(tp);
if (acked > so->so_snd.sb_cc) {
tp->snd_wnd -= so->so_snd.sb_cc;
- sbdrop(&so->so_snd, (int)so->so_snd.sb_cc);
+ sbdrop(so, &so->so_snd, (int)so->so_snd.sb_cc);
ourfinisacked = 1;
} else {
- sbdrop(&so->so_snd, acked);
+ sbdrop(so, &so->so_snd, acked);
tp->snd_wnd -= acked;
ourfinisacked = 0;
}
tcp_update_sndspace(tp);
- if (sb_notify(&so->so_snd)) {
+ if (sb_notify(so, &so->so_snd)) {
tp->t_flags |= TF_BLOCKOUTPUT;
sowwakeup(so);
tp->t_flags &= ~TF_BLOCKOUTPUT;
@@ -1997,7 +1997,7 @@ dodata:
/* XXX */
m_freem(m);
else {
m_adj(m, hdroptlen);
- sbappendstream(&so->so_rcv, m);
+ sbappendstream(so, &so->so_rcv, m);
}
tp->t_flags |= TF_BLOCKOUTPUT;
sorwakeup(so);
@@ -3107,7 +3107,7 @@ tcp_mss_update(struct tcpcb *tp)
bufsize = roundup(bufsize, mss);
if (bufsize > sb_max)
bufsize = sb_max;
- (void)sbreserve(&so->so_snd, bufsize);
+ (void)sbreserve(so, &so->so_snd, bufsize);
}
bufsize = so->so_rcv.sb_hiwat;
@@ -3115,7 +3115,7 @@ tcp_mss_update(struct tcpcb *tp)
bufsize = roundup(bufsize, mss);
if (bufsize > sb_max)
bufsize = sb_max;
- (void)sbreserve(&so->so_rcv, bufsize);
+ (void)sbreserve(so, &so->so_rcv, bufsize);
}
}
@@ -3909,7 +3909,7 @@ syn_cache_add(struct sockaddr *src, stru
/*
* Initialize some local state.
*/
- win = sbspace(&so->so_rcv);
+ win = sbspace(so, &so->so_rcv);
if (win > TCP_MAXWIN)
win = TCP_MAXWIN;
Index: netinet6/ip6_divert.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_divert.c,v
retrieving revision 1.47
diff -u -p -r1.47 ip6_divert.c
--- netinet6/ip6_divert.c 30 May 2017 07:50:37 -0000 1.47
+++ netinet6/ip6_divert.c 19 Jun 2017 10:28:00 -0000
@@ -223,7 +223,7 @@ divert6_packet(struct mbuf *m, int dir,
if (inp) {
sa = inp->inp_socket;
- if (sbappendaddr(&sa->so_rcv, sin6tosa(&addr), m, NULL) == 0) {
+ if (sbappendaddr(sa, &sa->so_rcv, sin6tosa(&addr), m, NULL) ==
0) {
div6stat_inc(div6s_fullsock);
m_freem(m);
return (0);
Index: netinet6/ip6_mroute.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_mroute.c,v
retrieving revision 1.113
diff -u -p -r1.113 ip6_mroute.c
--- netinet6/ip6_mroute.c 17 May 2017 13:25:27 -0000 1.113
+++ netinet6/ip6_mroute.c 19 Jun 2017 10:28:00 -0000
@@ -832,7 +832,7 @@ int
socket6_send(struct socket *s, struct mbuf *mm, struct sockaddr_in6 *src)
{
if (s) {
- if (sbappendaddr(&s->so_rcv, sin6tosa(src), mm, NULL) != 0) {
+ if (sbappendaddr(s, &s->so_rcv, sin6tosa(src), mm, NULL) != 0) {
sorwakeup(s);
return 0;
}
Index: netinet6/raw_ip6.c
===================================================================
RCS file: /cvs/src/sys/netinet6/raw_ip6.c,v
retrieving revision 1.114
diff -u -p -r1.114 raw_ip6.c
--- netinet6/raw_ip6.c 13 May 2017 17:44:00 -0000 1.114
+++ netinet6/raw_ip6.c 19 Jun 2017 10:28:00 -0000
@@ -191,7 +191,8 @@ rip6_input(struct mbuf **mp, int *offp,
ip6_savecontrol(last, n, &opts);
/* strip intermediate headers */
m_adj(n, *offp);
- if (sbappendaddr(&last->inp_socket->so_rcv,
+ if (sbappendaddr(last->inp_socket,
+ &last->inp_socket->so_rcv,
sin6tosa(&rip6src), n, opts) == 0) {
/* should notify about lost packet */
m_freem(n);
@@ -209,7 +210,7 @@ rip6_input(struct mbuf **mp, int *offp,
ip6_savecontrol(last, m, &opts);
/* strip intermediate headers */
m_adj(m, *offp);
- if (sbappendaddr(&last->inp_socket->so_rcv,
+ if (sbappendaddr(last->inp_socket, &last->inp_socket->so_rcv,
sin6tosa(&rip6src), m, opts) == 0) {
m_freem(m);
m_freem(opts);
Index: nfs/nfs_socket.c
===================================================================
RCS file: /cvs/src/sys/nfs/nfs_socket.c,v
retrieving revision 1.116
diff -u -p -r1.116 nfs_socket.c
--- nfs/nfs_socket.c 17 May 2017 08:59:05 -0000 1.116
+++ nfs/nfs_socket.c 19 Jun 2017 10:28:00 -0000
@@ -1179,7 +1179,7 @@ nfs_timer(void *arg)
* Set r_rtt to -1 in case we fail to send it now.
*/
rep->r_rtt = -1;
- if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
+ if (sbspace(so, &so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
(rep->r_flags & R_SENT) ||
nmp->nm_sent < nmp->nm_cwnd) &&
Index: sys/socketvar.h
===================================================================
RCS file: /cvs/src/sys/sys/socketvar.h,v
retrieving revision 1.69
diff -u -p -r1.69 socketvar.h
--- sys/socketvar.h 13 Mar 2017 20:18:21 -0000 1.69
+++ sys/socketvar.h 19 Jun 2017 10:28:00 -0000
@@ -151,6 +151,11 @@ struct socket {
#define SS_DNS 0x4000 /* created using SOCK_DNS
socket(2) */
#ifdef _KERNEL
+
+#include <lib/libkern/libkern.h>
+
+void soassertlocked(struct socket *);
+
/*
* Macros for sockets and socket buffering.
*/
@@ -161,8 +166,15 @@ struct socket {
/*
* Do we need to notify the other side when I/O is possible?
*/
-#define sb_notify(sb) ((((sb)->sb_flags | (sb)->sb_flagsintr) & \
- (SB_WAIT|SB_SEL|SB_ASYNC|SB_SPLICE|SB_KNOTE)) != 0)
+static inline int
+sb_notify(struct socket *so, struct sockbuf *sb)
+{
+ int flags = (sb->sb_flags | sb->sb_flagsintr);
+
+ KASSERT(sb == &so->so_rcv || sb == &so->so_snd);
+ soassertlocked(so);
+ return ((flags & (SB_WAIT|SB_SEL|SB_ASYNC|SB_SPLICE|SB_KNOTE)) != 0);
+}
/*
* How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
@@ -170,8 +182,13 @@ struct socket {
* still be negative (cc > hiwat or mbcnt > mbmax). Should detect
* overflow and return 0.
*/
-#define sbspace(sb) \
- lmin((sb)->sb_hiwat - (sb)->sb_cc, (sb)->sb_mbmax - (sb)->sb_mbcnt)
+static inline long
+sbspace(struct socket *so, struct sockbuf *sb)
+{
+ KASSERT(sb == &so->so_rcv || sb == &so->so_snd);
+ soassertlocked(so);
+ return lmin(sb->sb_hiwat - sb->sb_cc, sb->sb_mbmax - sb->sb_mbcnt);
+}
/* do we have to send all at once on a socket? */
#define sosendallatonce(so) \
@@ -190,7 +207,7 @@ struct socket {
/* can we write something to so? */
#define sowriteable(so) \
- ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat && \
+ ((sbspace((so), &(so)->so_snd) >= (so)->so_snd.sb_lowat && \
(((so)->so_state & SS_ISCONNECTED) || \
((so)->so_proto->pr_flags & PR_CONNREQUIRED)==0)) || \
((so)->so_state & SS_CANTSENDMORE) || (so)->so_error)
@@ -258,24 +275,24 @@ int soo_poll(struct file *fp, int events
int soo_kqfilter(struct file *fp, struct knote *kn);
int soo_close(struct file *fp, struct proc *p);
int soo_stat(struct file *, struct stat *, struct proc *);
-void sbappend(struct sockbuf *sb, struct mbuf *m);
-void sbappendstream(struct sockbuf *sb, struct mbuf *m);
-int sbappendaddr(struct sockbuf *sb, struct sockaddr *asa,
- struct mbuf *m0, struct mbuf *control);
-int sbappendcontrol(struct sockbuf *sb, struct mbuf *m0,
- struct mbuf *control);
-void sbappendrecord(struct sockbuf *sb, struct mbuf *m0);
+void sbappend(struct socket *, struct sockbuf *, struct mbuf *);
+void sbappendstream(struct socket *, struct sockbuf *, struct mbuf *);
+int sbappendaddr(struct socket *, struct sockbuf *, struct sockaddr *,
+ struct mbuf *, struct mbuf *);
+int sbappendcontrol(struct socket *, struct sockbuf *, struct mbuf *,
+ struct mbuf *);
+void sbappendrecord(struct socket *, struct sockbuf *, struct mbuf *);
void sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n);
struct mbuf *
sbcreatecontrol(caddr_t p, int size, int type, int level);
-void sbdrop(struct sockbuf *sb, int len);
+void sbdrop(struct socket *, struct sockbuf *, int);
void sbdroprecord(struct sockbuf *sb);
-void sbflush(struct sockbuf *sb);
+void sbflush(struct socket *, struct sockbuf *);
void sbinsertoob(struct sockbuf *sb, struct mbuf *m0);
-void sbrelease(struct sockbuf *sb);
+void sbrelease(struct socket *, struct sockbuf *);
int sbcheckreserve(u_long cnt, u_long defcnt);
int sbchecklowmem(void);
-int sbreserve(struct sockbuf *sb, u_long cc);
+int sbreserve(struct socket *, struct sockbuf *, u_long);
int sbwait(struct socket *, struct sockbuf *sb);
int sb_lock(struct sockbuf *sb);
void soinit(void);
@@ -319,7 +336,6 @@ int sockargs(struct mbuf **, const void
int sosleep(struct socket *, void *, int, const char *, int);
int solock(struct socket *);
void sounlock(int);
-void soassertlocked(struct socket *);
int sendit(struct proc *, int, struct msghdr *, int, register_t *);
int recvit(struct proc *, int, struct msghdr *, caddr_t,
Index: sys/event.h
===================================================================
RCS file: /cvs/src/sys/sys/event.h,v
retrieving revision 1.25
diff -u -p -r1.25 event.h
--- sys/event.h 31 May 2017 14:52:05 -0000 1.25
+++ sys/event.h 19 Jun 2017 13:38:40 -0000
@@ -80,6 +80,13 @@ struct kevent {
#define EV_ERROR 0x4000 /* error, data contains errno */
/*
+ * hint flag for in-kernel use - must not equal any existing note
+ */
+#ifdef _KERNEL
+#define NOTE_SUBMIT 0x01000000 /* initial knote submission */
+#endif
+
+/*
* data/hint flags for EVFILT_{READ|WRITE}, shared with userspace
*/
#define NOTE_LOWAT 0x0001 /* low water mark */