Diff below remove the KERNEL_LOCK() around all pr_input() routines.
It's a bit rough so I'd appreciate more tests before splitting it into
pieces.
I'm using tasks to delay selwakeup/csignal calls, just like I did for
bpf(4).
Questions, Comments?
Index: kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.207
diff -u -p -r1.207 uipc_socket.c
--- kern/uipc_socket.c 4 Nov 2017 14:13:53 -0000 1.207
+++ kern/uipc_socket.c 20 Nov 2017 15:12:29 -0000
@@ -135,6 +135,8 @@ socreate(int dom, struct socket **aso, i
so->so_egid = p->p_ucred->cr_gid;
so->so_cpid = p->p_p->ps_pid;
so->so_proto = prp;
+ task_set(&so->so_rcv.sb_wtask, sorwakeup_cb, so);
+ task_set(&so->so_snd.sb_wtask, sowwakeup_cb, so);
s = solock(so);
error = (*prp->pr_attach)(so, proto);
@@ -205,6 +207,11 @@ sofree(struct socket *so)
if (!soqremque(so, 0))
return;
}
+
+ if (!task_del(systq, &so->so_rcv.sb_wtask) ||
+ !task_del(systq, &so->so_snd.sb_wtask))
+ taskq_barrier(systq);
+
#ifdef SOCKET_SPLICE
if (so->so_sp) {
if (issplicedback(so))
@@ -453,7 +460,7 @@ restart:
(atomic || space < so->so_snd.sb_lowat))) {
if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT))
snderr(EWOULDBLOCK);
- sbunlock(&so->so_snd);
+ sbunlock(so, &so->so_snd);
error = sbwait(so, &so->so_snd);
so->so_state &= ~SS_ISSENDING;
if (error)
@@ -497,7 +504,7 @@ restart:
release:
so->so_state &= ~SS_ISSENDING;
- sbunlock(&so->so_snd);
+ sbunlock(so, &so->so_snd);
out:
sounlock(s);
m_freem(top);
@@ -736,7 +743,7 @@ restart:
}
SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1");
SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1");
- sbunlock(&so->so_rcv);
+ sbunlock(so, &so->so_rcv);
error = sbwait(so, &so->so_rcv);
sounlock(s);
if (error)
@@ -957,7 +964,7 @@ dontblock:
SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2");
error = sbwait(so, &so->so_rcv);
if (error) {
- sbunlock(&so->so_rcv);
+ sbunlock(so, &so->so_rcv);
sounlock(s);
return (0);
}
@@ -993,7 +1000,7 @@ dontblock:
}
if (orig_resid == uio->uio_resid && orig_resid &&
(flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
- sbunlock(&so->so_rcv);
+ sbunlock(so, &so->so_rcv);
sounlock(s);
goto restart;
}
@@ -1004,7 +1011,7 @@ dontblock:
if (flagsp)
*flagsp |= flags;
release:
- sbunlock(&so->so_rcv);
+ sbunlock(so, &so->so_rcv);
sounlock(s);
return (error);
}
@@ -1044,20 +1051,18 @@ sorflush(struct socket *so)
struct socket aso;
int error;
+ soassertlocked(so);
+
sb->sb_flags |= SB_NOINTR;
error = sblock(so, sb, M_WAITOK);
/* with SB_NOINTR and M_WAITOK sblock() must not fail */
KASSERT(error == 0);
socantrcvmore(so);
- sbunlock(sb);
+ sbunlock(so, sb);
aso.so_proto = pr;
aso.so_rcv = *sb;
- memset(sb, 0, sizeof (*sb));
- /* XXX - the memset stomps all over so_rcv */
- if (aso.so_rcv.sb_flags & SB_KNOTE) {
- sb->sb_sel.si_note = aso.so_rcv.sb_sel.si_note;
- sb->sb_flags = SB_KNOTE;
- }
+ memset(&sb->sb_startzero, 0,
+ (caddr_t)&sb->sb_endzero - (caddr_t)&sb->sb_startzero);
if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
(*pr->pr_domain->dom_dispose)(aso.so_rcv.sb_mb);
sbrelease(&aso, &aso.so_rcv);
@@ -1110,7 +1115,7 @@ sosplice(struct socket *so, int fd, off_
}
if (so->so_sp->ssp_socket)
sounsplice(so, so->so_sp->ssp_socket, 1);
- sbunlock(&so->so_rcv);
+ sbunlock(so, &so->so_rcv);
return (0);
}
@@ -1139,7 +1144,7 @@ sosplice(struct socket *so, int fd, off_
return (error);
}
if ((error = sblock(so, &sosp->so_snd, M_WAITOK)) != 0) {
- sbunlock(&so->so_rcv);
+ sbunlock(so, &so->so_rcv);
FRELE(fp, curproc);
return (error);
}
@@ -1183,8 +1188,8 @@ sosplice(struct socket *so, int fd, off_
}
release:
- sbunlock(&sosp->so_snd);
- sbunlock(&so->so_rcv);
+ sbunlock(sosp, &sosp->so_snd);
+ sbunlock(so, &so->so_rcv);
FRELE(fp, curproc);
return (error);
}
@@ -1544,7 +1549,8 @@ sorwakeup(struct socket *so)
if (isspliced(so))
return;
#endif
- sowakeup(so, &so->so_rcv);
+ if ((so->so_state & SS_NOFDREF) == 0)
+ task_add(systq, &so->so_rcv.sb_wtask);
if (so->so_upcall)
(*(so->so_upcall))(so, so->so_upcallarg, M_DONTWAIT);
}
@@ -1558,7 +1564,9 @@ sowwakeup(struct socket *so)
if (so->so_snd.sb_flagsintr & SB_SPLICE)
task_add(sosplice_taskq, &so->so_sp->ssp_soback->so_splicetask);
#endif
- sowakeup(so, &so->so_snd);
+
+ if ((so->so_state & SS_NOFDREF) == 0)
+ task_add(systq, &so->so_snd.sb_wtask);
}
int
Index: kern/uipc_socket2.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
retrieving revision 1.86
diff -u -p -r1.86 uipc_socket2.c
--- kern/uipc_socket2.c 11 Aug 2017 21:24:19 -0000 1.86
+++ kern/uipc_socket2.c 20 Nov 2017 15:08:39 -0000
@@ -189,6 +189,8 @@ sonewconn(struct socket *head, int conns
so->so_rcv.sb_wat = head->so_rcv.sb_wat;
so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
+ task_set(&so->so_rcv.sb_wtask, sorwakeup_cb, so);
+ task_set(&so->so_snd.sb_wtask, sowwakeup_cb, so);
soqinsque(head, so, soqueue);
if ((*so->so_proto->pr_attach)(so, 0)) {
@@ -342,7 +344,6 @@ sblock(struct socket *so, struct sockbuf
{
int error, prio = (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH;
- KERNEL_ASSERT_LOCKED();
soassertlocked(so);
if ((sb->sb_flags & SB_LOCK) == 0) {
@@ -363,15 +364,37 @@ sblock(struct socket *so, struct sockbuf
}
void
-sbunlock(struct sockbuf *sb)
+sbunlock(struct socket *so, struct sockbuf *sb)
{
- KERNEL_ASSERT_LOCKED();
+ soassertlocked(so);
sb->sb_flags &= ~SB_LOCK;
if (sb->sb_flags & SB_WANT) {
sb->sb_flags &= ~SB_WANT;
wakeup(&sb->sb_flags);
}
+}
+
+void
+sorwakeup_cb(void *xso)
+{
+ struct socket *so = xso;
+ int s;
+
+ s = solock(so);
+ sowakeup(so, &so->so_rcv);
+ sounlock(s);
+}
+
+void
+sowwakeup_cb(void *xso)
+{
+ struct socket *so = xso;
+ int s;
+
+ s = solock(so);
+ sowakeup(so, &so->so_snd);
+ sounlock(s);
}
/*
Index: net/if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.530
diff -u -p -r1.530 if.c
--- net/if.c 20 Nov 2017 10:16:25 -0000 1.530
+++ net/if.c 20 Nov 2017 11:47:43 -0000
@@ -933,7 +933,6 @@ if_netisr(void *unused)
{
int n, t = 0;
- KERNEL_LOCK();
NET_LOCK();
while ((n = netisr) != 0) {
@@ -947,8 +946,11 @@ if_netisr(void *unused)
atomic_clearbits_int(&netisr, n);
#if NETHER > 0
- if (n & (1 << NETISR_ARP))
+ if (n & (1 << NETISR_ARP)) {
+ KERNEL_LOCK();
arpintr();
+ KERNEL_UNLOCK();
+ }
#endif
if (n & (1 << NETISR_IP))
ipintr();
@@ -957,35 +959,52 @@ if_netisr(void *unused)
ip6intr();
#endif
#if NPPP > 0
- if (n & (1 << NETISR_PPP))
+ if (n & (1 << NETISR_PPP)) {
+ KERNEL_LOCK();
pppintr();
+ KERNEL_UNLOCK();
+ }
#endif
#if NBRIDGE > 0
- if (n & (1 << NETISR_BRIDGE))
+ if (n & (1 << NETISR_BRIDGE)) {
+ KERNEL_LOCK();
bridgeintr();
+ KERNEL_UNLOCK();
+ }
#endif
#if NSWITCH > 0
- if (n & (1 << NETISR_SWITCH))
+ if (n & (1 << NETISR_SWITCH)) {
+ KERNEL_LOCK();
switchintr();
+ KERNEL_UNLOCK();
+ }
#endif
#if NPPPOE > 0
- if (n & (1 << NETISR_PPPOE))
+ if (n & (1 << NETISR_PPPOE)) {
+ KERNEL_LOCK();
pppoeintr();
+ KERNEL_UNLOCK();
+ }
#endif
#ifdef PIPEX
- if (n & (1 << NETISR_PIPEX))
+ if (n & (1 << NETISR_PIPEX)) {
+ KERNEL_LOCK();
pipexintr();
+ KERNEL_UNLOCK();
+ }
#endif
t |= n;
}
#if NPFSYNC > 0
- if (t & (1 << NETISR_PFSYNC))
+ if (t & (1 << NETISR_PFSYNC)) {
+ KERNEL_LOCK();
pfsyncintr();
+ KERNEL_UNLOCK();
+ }
#endif
NET_UNLOCK();
- KERNEL_UNLOCK();
}
void
Index: netinet/ip_input.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.333
diff -u -p -r1.333 ip_input.c
--- netinet/ip_input.c 20 Nov 2017 10:35:24 -0000 1.333
+++ netinet/ip_input.c 20 Nov 2017 11:49:00 -0000
@@ -619,8 +619,6 @@ ip_deliver(struct mbuf **mp, int *offp,
int nest = 0;
#endif /* INET6 */
- KERNEL_ASSERT_LOCKED();
-
/* pf might have modified stuff, might have to chksum */
switch (af) {
case AF_INET:
Index: sys/socketvar.h
===================================================================
RCS file: /cvs/src/sys/sys/socketvar.h,v
retrieving revision 1.77
diff -u -p -r1.77 socketvar.h
--- sys/socketvar.h 4 Nov 2017 14:13:53 -0000 1.77
+++ sys/socketvar.h 20 Nov 2017 15:08:53 -0000
@@ -98,6 +98,8 @@ struct socket {
* Variables for socket buffering.
*/
struct sockbuf {
+/* The following fields are all zeroed on flush. */
+#define sb_startzero sb_cc
u_long sb_cc; /* actual chars in buffer */
u_long sb_datacc; /* data only chars in buffer */
u_long sb_hiwat; /* max actual char count */
@@ -109,10 +111,13 @@ struct socket {
struct mbuf *sb_mbtail; /* the last mbuf in the chain */
struct mbuf *sb_lastrecord;/* first mbuf of last record in
socket buffer */
+/* End area that is zeroed on flush. */
+#define sb_endzero sb_sel
struct selinfo sb_sel; /* process selecting read/write */
int sb_flagsintr; /* flags, changed during interrupt */
short sb_flags; /* flags, see below */
u_short sb_timeo; /* timeout for read/write */
+ struct task sb_wtask; /* delay csignal() and selwakeup() */
} so_rcv, so_snd;
#define SB_MAX (2*1024*1024) /* default for max chars in
sockbuf */
#define SB_LOCK 0x01 /* lock on data queue */
@@ -244,7 +249,7 @@ soreadable(struct socket *so)
int sblock(struct socket *, struct sockbuf *, int);
/* release lock on sockbuf sb */
-void sbunlock(struct sockbuf *);
+void sbunlock(struct socket *, struct sockbuf *);
#define SB_EMPTY_FIXUP(sb) do {
\
if ((sb)->sb_mb == NULL) { \
@@ -329,6 +334,8 @@ int sosend(struct socket *so, struct mbu
int sosetopt(struct socket *so, int level, int optname, struct mbuf *m);
int soshutdown(struct socket *so, int how);
void sowakeup(struct socket *so, struct sockbuf *sb);
+void sorwakeup_cb(void *);
+void sowwakeup_cb(void *);
void sorwakeup(struct socket *);
void sowwakeup(struct socket *);
int sockargs(struct mbuf **, const void *, size_t, int);