On 27/11/17(Mon) 14:01, Alexander Bluhm wrote:
> On Mon, Nov 27, 2017 at 12:20:34PM +0100, Martin Pieuchot wrote:
> > Questions, comments, tests?
>
> New panic with regress. I think it was sys/kern/sosplice this time.
>
> login: panic: kernel diagnostic assertion "_kernel_lock_held()" failed: file
> "/usr/src/sys/kern/uipc_socket.c", line 1882
> Stopped at db_enter+0x4: popl %ebp
Thanks, I'm now grabbing the KERNEL_LOCK() in sohasoutofband() instead
of asserting that we have it.
Index: kern/sys_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/sys_socket.c,v
retrieving revision 1.34
diff -u -p -r1.34 sys_socket.c
--- kern/sys_socket.c 14 Nov 2017 16:01:55 -0000 1.34
+++ kern/sys_socket.c 27 Nov 2017 10:46:15 -0000
@@ -166,11 +166,11 @@ soo_poll(struct file *fp, int events, st
if (revents == 0) {
if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
selrecord(p, &so->so_rcv.sb_sel);
- so->so_rcv.sb_flagsintr |= SB_SEL;
+ so->so_rcv.sb_flags |= SB_SEL;
}
if (events & (POLLOUT | POLLWRNORM)) {
selrecord(p, &so->so_snd.sb_sel);
- so->so_snd.sb_flagsintr |= SB_SEL;
+ so->so_snd.sb_flags |= SB_SEL;
}
}
sounlock(s);
Index: kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.209
diff -u -p -r1.209 uipc_socket.c
--- kern/uipc_socket.c 23 Nov 2017 13:45:46 -0000 1.209
+++ kern/uipc_socket.c 28 Nov 2017 13:27:09 -0000
@@ -135,6 +135,8 @@ socreate(int dom, struct socket **aso, i
so->so_egid = p->p_ucred->cr_gid;
so->so_cpid = p->p_p->ps_pid;
so->so_proto = prp;
+ task_set(&so->so_rcv.sb_wtask, sorwakeup_cb, so);
+ task_set(&so->so_snd.sb_wtask, sowwakeup_cb, so);
s = solock(so);
error = (*prp->pr_attach)(so, proto);
@@ -194,7 +196,8 @@ sofree(struct socket *so)
{
soassertlocked(so);
- if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
+ if ((so->so_pcb != NULL) ||
+ (so->so_state & (SS_NOFDREF|SS_NONOTIF)) != (SS_NOFDREF|SS_NONOTIF))
return;
if (so->so_head) {
/*
@@ -273,8 +276,15 @@ drop:
error = error2;
}
discard:
- if (so->so_state & SS_NOFDREF)
- panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type);
+ KASSERT((so->so_state & (SS_NOFDREF|SS_NONOTIF)) == 0);
+ /* Make sure possible delayed notification are delivered. */
+ so->so_state |= SS_NONOTIF;
+ if (!task_del(systq, &so->so_rcv.sb_wtask) ||
+ !task_del(systq, &so->so_snd.sb_wtask)) {
+ NET_UNLOCK();
+ taskq_barrier(systq);
+ NET_LOCK();
+ }
so->so_state |= SS_NOFDREF;
sofree(so);
sounlock(s);
@@ -296,9 +306,8 @@ soaccept(struct socket *so, struct mbuf
int error = 0;
soassertlocked(so);
+ KASSERT(so->so_state & SS_NOFDREF);
- if ((so->so_state & SS_NOFDREF) == 0)
- panic("soaccept !NOFDREF: so %p, so_type %d", so, so->so_type);
so->so_state &= ~SS_NOFDREF;
if ((so->so_state & SS_ISDISCONNECTED) == 0 ||
(so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0)
@@ -1052,12 +1061,8 @@ sorflush(struct socket *so)
sbunlock(so, sb);
aso.so_proto = pr;
aso.so_rcv = *sb;
- memset(sb, 0, sizeof (*sb));
- /* XXX - the memset stomps all over so_rcv */
- if (aso.so_rcv.sb_flags & SB_KNOTE) {
- sb->sb_sel.si_note = aso.so_rcv.sb_sel.si_note;
- sb->sb_flags = SB_KNOTE;
- }
+ memset(&sb->sb_startzero, 0,
+ (caddr_t)&sb->sb_endzero - (caddr_t)&sb->sb_startzero);
if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
(*pr->pr_domain->dom_dispose)(aso.so_rcv.sb_mb);
sbrelease(&aso, &aso.so_rcv);
@@ -1178,8 +1183,8 @@ sosplice(struct socket *so, int fd, off_
* we sleep, the socket buffers are not marked as spliced yet.
*/
if (somove(so, M_WAIT)) {
- so->so_rcv.sb_flagsintr |= SB_SPLICE;
- sosp->so_snd.sb_flagsintr |= SB_SPLICE;
+ so->so_rcv.sb_flags |= SB_SPLICE;
+ sosp->so_snd.sb_flags |= SB_SPLICE;
}
release:
@@ -1196,8 +1201,8 @@ sounsplice(struct socket *so, struct soc
task_del(sosplice_taskq, &so->so_splicetask);
timeout_del(&so->so_idleto);
- sosp->so_snd.sb_flagsintr &= ~SB_SPLICE;
- so->so_rcv.sb_flagsintr &= ~SB_SPLICE;
+ sosp->so_snd.sb_flags &= ~SB_SPLICE;
+ so->so_rcv.sb_flags &= ~SB_SPLICE;
so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL;
if (wakeup && soreadable(so))
sorwakeup(so);
@@ -1210,7 +1215,7 @@ soidle(void *arg)
int s;
s = solock(so);
- if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
+ if (so->so_rcv.sb_flags & SB_SPLICE) {
so->so_error = ETIMEDOUT;
sounsplice(so, so->so_sp->ssp_socket, 1);
}
@@ -1224,7 +1229,7 @@ sotask(void *arg)
int s;
s = solock(so);
- if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
+ if (so->so_rcv.sb_flags & SB_SPLICE) {
/*
* We may not sleep here as sofree() and unsplice() may be
* called from softnet interrupt context. This would remove
@@ -1527,7 +1532,7 @@ sorwakeup(struct socket *so)
soassertlocked(so);
#ifdef SOCKET_SPLICE
- if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
+ if (so->so_rcv.sb_flags & SB_SPLICE) {
/*
* TCP has a sendbuffer that can handle multiple packets
* at once. So queue the stream a bit to accumulate data.
@@ -1544,7 +1549,8 @@ sorwakeup(struct socket *so)
if (isspliced(so))
return;
#endif
- sowakeup(so, &so->so_rcv);
+ if ((so->so_state & SS_NONOTIF) == 0)
+ task_add(systq, &so->so_rcv.sb_wtask);
if (so->so_upcall)
(*(so->so_upcall))(so, so->so_upcallarg, M_DONTWAIT);
}
@@ -1555,10 +1561,12 @@ sowwakeup(struct socket *so)
soassertlocked(so);
#ifdef SOCKET_SPLICE
- if (so->so_snd.sb_flagsintr & SB_SPLICE)
+ if (so->so_snd.sb_flags & SB_SPLICE)
task_add(sosplice_taskq, &so->so_sp->ssp_soback->so_splicetask);
#endif
- sowakeup(so, &so->so_snd);
+
+ if ((so->so_state & SS_NONOTIF) == 0)
+ task_add(systq, &so->so_snd.sb_wtask);
}
int
@@ -1871,9 +1879,10 @@ sogetopt(struct socket *so, int level, i
void
sohasoutofband(struct socket *so)
{
- KERNEL_ASSERT_LOCKED();
+ KERNEL_LOCK();
csignal(so->so_pgid, SIGURG, so->so_siguid, so->so_sigeuid);
selwakeup(&so->so_rcv.sb_sel);
+ KERNEL_UNLOCK();
}
int
@@ -2025,7 +2034,6 @@ sobuf_print(struct sockbuf *sb,
(*pr)("\tsb_mbtail: %p\n", sb->sb_mbtail);
(*pr)("\tsb_lastrecord: %p\n", sb->sb_lastrecord);
(*pr)("\tsb_sel: ...\n");
- (*pr)("\tsb_flagsintr: %d\n", sb->sb_flagsintr);
(*pr)("\tsb_flags: %i\n", sb->sb_flags);
(*pr)("\tsb_timeo: %i\n", sb->sb_timeo);
}
Index: kern/uipc_socket2.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
retrieving revision 1.87
diff -u -p -r1.87 uipc_socket2.c
--- kern/uipc_socket2.c 23 Nov 2017 13:42:53 -0000 1.87
+++ kern/uipc_socket2.c 27 Nov 2017 11:10:57 -0000
@@ -189,6 +189,8 @@ sonewconn(struct socket *head, int conns
so->so_rcv.sb_wat = head->so_rcv.sb_wat;
so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
+ task_set(&so->so_rcv.sb_wtask, sorwakeup_cb, so);
+ task_set(&so->so_snd.sb_wtask, sowwakeup_cb, so);
soqinsque(head, so, soqueue);
if ((*so->so_proto->pr_attach)(so, 0)) {
@@ -329,12 +331,12 @@ sosleep(struct socket *so, void *ident,
int
sbwait(struct socket *so, struct sockbuf *sb)
{
+ int prio = (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH;
+
soassertlocked(so);
- sb->sb_flagsintr |= SB_WAIT;
- return (sosleep(so, &sb->sb_cc,
- (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "netio",
- sb->sb_timeo));
+ sb->sb_flags |= SB_WAIT;
+ return (sosleep(so, &sb->sb_cc, prio, "netio", sb->sb_timeo));
}
int
@@ -351,6 +353,9 @@ sblock(struct socket *so, struct sockbuf
if (wait & M_NOWAIT)
return (EWOULDBLOCK);
+ /* XXXSMP ensure sosleep() isn't called from input path. */
+ KERNEL_ASSERT_LOCKED();
+
while (sb->sb_flags & SB_LOCK) {
sb->sb_flags |= SB_WANT;
error = sosleep(so, &sb->sb_flags, prio, "netlck", 0);
@@ -364,13 +369,35 @@ sblock(struct socket *so, struct sockbuf
void
sbunlock(struct socket *so, struct sockbuf *sb)
{
+ int flags = sb->sb_flags;
+
soassertlocked(so);
- sb->sb_flags &= ~SB_LOCK;
- if (sb->sb_flags & SB_WANT) {
- sb->sb_flags &= ~SB_WANT;
+ sb->sb_flags &= ~(SB_LOCK|SB_WANT);
+ if (flags & SB_WANT)
wakeup(&sb->sb_flags);
- }
+}
+
+void
+sorwakeup_cb(void *xso)
+{
+ struct socket *so = xso;
+ int s;
+
+ s = solock(so);
+ sowakeup(so, &so->so_rcv);
+ sounlock(s);
+}
+
+void
+sowwakeup_cb(void *xso)
+{
+ struct socket *so = xso;
+ int s;
+
+ s = solock(so);
+ sowakeup(so, &so->so_snd);
+ sounlock(s);
}
/*
@@ -381,15 +408,15 @@ sbunlock(struct socket *so, struct sockb
void
sowakeup(struct socket *so, struct sockbuf *sb)
{
+ int flags = sb->sb_flags;
+
KERNEL_ASSERT_LOCKED();
soassertlocked(so);
selwakeup(&sb->sb_sel);
- sb->sb_flagsintr &= ~SB_SEL;
- if (sb->sb_flagsintr & SB_WAIT) {
- sb->sb_flagsintr &= ~SB_WAIT;
+ sb->sb_flags &= ~(SB_SEL|SB_WAIT);
+ if (flags & SB_WAIT)
wakeup(&sb->sb_cc);
- }
if (so->so_state & SS_ASYNC)
csignal(so->so_pgid, SIGIO, so->so_siguid, so->so_sigeuid);
}
Index: miscfs/fifofs/fifo_vnops.c
===================================================================
RCS file: /cvs/src/sys/miscfs/fifofs/fifo_vnops.c,v
retrieving revision 1.59
diff -u -p -r1.59 fifo_vnops.c
--- miscfs/fifofs/fifo_vnops.c 4 Nov 2017 14:13:53 -0000 1.59
+++ miscfs/fifofs/fifo_vnops.c 27 Nov 2017 10:48:00 -0000
@@ -333,11 +333,11 @@ fifo_poll(void *v)
events = POLLIN;
if (events & (POLLIN | POLLRDNORM)) {
selrecord(ap->a_p, &rso->so_rcv.sb_sel);
- rso->so_rcv.sb_flagsintr |= SB_SEL;
+ rso->so_rcv.sb_flags |= SB_SEL;
}
if (events & (POLLOUT | POLLWRNORM)) {
selrecord(ap->a_p, &wso->so_snd.sb_sel);
- wso->so_snd.sb_flagsintr |= SB_SEL;
+ wso->so_snd.sb_flags |= SB_SEL;
}
}
return (revents);
Index: net/if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.530
diff -u -p -r1.530 if.c
--- net/if.c 20 Nov 2017 10:16:25 -0000 1.530
+++ net/if.c 27 Nov 2017 10:13:22 -0000
@@ -933,7 +933,6 @@ if_netisr(void *unused)
{
int n, t = 0;
- KERNEL_LOCK();
NET_LOCK();
while ((n = netisr) != 0) {
@@ -947,8 +946,11 @@ if_netisr(void *unused)
atomic_clearbits_int(&netisr, n);
#if NETHER > 0
- if (n & (1 << NETISR_ARP))
+ if (n & (1 << NETISR_ARP)) {
+ KERNEL_LOCK();
arpintr();
+ KERNEL_UNLOCK();
+ }
#endif
if (n & (1 << NETISR_IP))
ipintr();
@@ -957,35 +959,52 @@ if_netisr(void *unused)
ip6intr();
#endif
#if NPPP > 0
- if (n & (1 << NETISR_PPP))
+ if (n & (1 << NETISR_PPP)) {
+ KERNEL_LOCK();
pppintr();
+ KERNEL_UNLOCK();
+ }
#endif
#if NBRIDGE > 0
- if (n & (1 << NETISR_BRIDGE))
+ if (n & (1 << NETISR_BRIDGE)) {
+ KERNEL_LOCK();
bridgeintr();
+ KERNEL_UNLOCK();
+ }
#endif
#if NSWITCH > 0
- if (n & (1 << NETISR_SWITCH))
+ if (n & (1 << NETISR_SWITCH)) {
+ KERNEL_LOCK();
switchintr();
+ KERNEL_UNLOCK();
+ }
#endif
#if NPPPOE > 0
- if (n & (1 << NETISR_PPPOE))
+ if (n & (1 << NETISR_PPPOE)) {
+ KERNEL_LOCK();
pppoeintr();
+ KERNEL_UNLOCK();
+ }
#endif
#ifdef PIPEX
- if (n & (1 << NETISR_PIPEX))
+ if (n & (1 << NETISR_PIPEX)) {
+ KERNEL_LOCK();
pipexintr();
+ KERNEL_UNLOCK();
+ }
#endif
t |= n;
}
#if NPFSYNC > 0
- if (t & (1 << NETISR_PFSYNC))
+ if (t & (1 << NETISR_PFSYNC)) {
+ KERNEL_LOCK();
pfsyncintr();
+ KERNEL_UNLOCK();
+ }
#endif
NET_UNLOCK();
- KERNEL_UNLOCK();
}
void
Index: sys/socketvar.h
===================================================================
RCS file: /cvs/src/sys/sys/socketvar.h,v
retrieving revision 1.79
diff -u -p -r1.79 socketvar.h
--- sys/socketvar.h 23 Nov 2017 13:45:46 -0000 1.79
+++ sys/socketvar.h 27 Nov 2017 10:48:45 -0000
@@ -98,6 +98,8 @@ struct socket {
* Variables for socket buffering.
*/
struct sockbuf {
+/* The following fields are all zeroed on flush. */
+#define sb_startzero sb_cc
u_long sb_cc; /* actual chars in buffer */
u_long sb_datacc; /* data only chars in buffer */
u_long sb_hiwat; /* max actual char count */
@@ -109,10 +111,13 @@ struct socket {
struct mbuf *sb_mbtail; /* the last mbuf in the chain */
struct mbuf *sb_lastrecord;/* first mbuf of last record in
socket buffer */
+/* End area that is zeroed on flush. */
+#define sb_endzero sb_sel
struct selinfo sb_sel; /* process selecting read/write */
- int sb_flagsintr; /* flags, changed during interrupt */
- short sb_flags; /* flags, see below */
+ int sb_flags ; /* flags, see below */
+ short __pad;
u_short sb_timeo; /* timeout for read/write */
+ struct task sb_wtask; /* delay csignal() and selwakeup() */
} so_rcv, so_snd;
#define SB_MAX (2*1024*1024) /* default for max chars in
sockbuf */
#define SB_LOCK 0x01 /* lock on data queue */
@@ -149,6 +154,7 @@ struct socket {
#define SS_CONNECTOUT 0x1000 /* connect, not accept, at this
end */
#define SS_ISSENDING 0x2000 /* hint for lower layer */
#define SS_DNS 0x4000 /* created using SOCK_DNS
socket(2) */
+#define SS_NONOTIF 0x8000 /* no notification allowed */
#ifdef _KERNEL
@@ -169,7 +175,7 @@ void soassertlocked(struct socket *);
static inline int
sb_notify(struct socket *so, struct sockbuf *sb)
{
- int flags = (sb->sb_flags | sb->sb_flagsintr);
+ int flags = sb->sb_flags;
KASSERT(sb == &so->so_rcv || sb == &so->so_snd);
soassertlocked(so);
@@ -329,6 +335,8 @@ int sosend(struct socket *so, struct mbu
int sosetopt(struct socket *so, int level, int optname, struct mbuf *m);
int soshutdown(struct socket *so, int how);
void sowakeup(struct socket *so, struct sockbuf *sb);
+void sorwakeup_cb(void *);
+void sowwakeup_cb(void *);
void sorwakeup(struct socket *);
void sowwakeup(struct socket *);
int sockargs(struct mbuf **, const void *, size_t, int);