On Mon, Nov 27, 2017 at 12:20:34PM +0100, Martin Pieuchot wrote:
> Questions, comments, tests?
New panic with regress. I think it was sys/kern/sosplice this time.
login: panic: kernel diagnostic assertion "_kernel_lock_held()" failed: file
"/usr/src/sys/kern/uipc_socket.c", line 1882
Stopped at db_enter+0x4: popl %ebp
TID PID UID PRFLAGS PFLAGS CPU COMMAND
124975 87761 0 0x2 0 0 perl
* 64725 54083 0 0x14000 0x200 1 softnet
db_enter() at db_enter+0x4
panic() at panic+0xcc
__assert(d09ca4c9,d0b8b601,75a,d0a6b5e8) at __assert+0x19
sohasoutofband(d77dea8c) at sohasoutofband+0x4b
tcp_input(f547e27c,f547e278,6,2) at tcp_input+0x2ba2
ip_deliver(f547e27c,f547e278,6,2) at ip_deliver+0x21f
ip_local(f547e27c,f547e278,d06dcd6a,0) at ip_local+0x139
ipintr() at ipintr+0x54
if_netisr(0) at if_netisr+0x5a
taskq_thread(d7810080) at taskq_thread+0x6c
https://www.openbsd.org/ddb.html describes the minimum info required in bug
reports. Insufficient info makes it difficult to find and fix bugs.
bluhm
> Index: kern/sys_socket.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/sys_socket.c,v
> retrieving revision 1.34
> diff -u -p -r1.34 sys_socket.c
> --- kern/sys_socket.c 14 Nov 2017 16:01:55 -0000 1.34
> +++ kern/sys_socket.c 27 Nov 2017 10:46:15 -0000
> @@ -166,11 +166,11 @@ soo_poll(struct file *fp, int events, st
> if (revents == 0) {
> if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
> selrecord(p, &so->so_rcv.sb_sel);
> - so->so_rcv.sb_flagsintr |= SB_SEL;
> + so->so_rcv.sb_flags |= SB_SEL;
> }
> if (events & (POLLOUT | POLLWRNORM)) {
> selrecord(p, &so->so_snd.sb_sel);
> - so->so_snd.sb_flagsintr |= SB_SEL;
> + so->so_snd.sb_flags |= SB_SEL;
> }
> }
> sounlock(s);
> Index: kern/uipc_socket.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_socket.c,v
> retrieving revision 1.209
> diff -u -p -r1.209 uipc_socket.c
> --- kern/uipc_socket.c 23 Nov 2017 13:45:46 -0000 1.209
> +++ kern/uipc_socket.c 27 Nov 2017 10:53:44 -0000
> @@ -135,6 +135,8 @@ socreate(int dom, struct socket **aso, i
> so->so_egid = p->p_ucred->cr_gid;
> so->so_cpid = p->p_p->ps_pid;
> so->so_proto = prp;
> + task_set(&so->so_rcv.sb_wtask, sorwakeup_cb, so);
> + task_set(&so->so_snd.sb_wtask, sowwakeup_cb, so);
>
> s = solock(so);
> error = (*prp->pr_attach)(so, proto);
> @@ -194,7 +196,8 @@ sofree(struct socket *so)
> {
> soassertlocked(so);
>
> - if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
> + if ((so->so_pcb != NULL) ||
> + (so->so_state & (SS_NOFDREF|SS_NONOTIF)) != (SS_NOFDREF|SS_NONOTIF))
> return;
> if (so->so_head) {
> /*
> @@ -273,8 +276,15 @@ drop:
> error = error2;
> }
> discard:
> - if (so->so_state & SS_NOFDREF)
> - panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type);
> + KASSERT((so->so_state & (SS_NOFDREF|SS_NONOTIF)) == 0);
> + /* Make sure possible delayed notification are delivered. */
> + so->so_state |= SS_NONOTIF;
> + if (!task_del(systq, &so->so_rcv.sb_wtask) ||
> + !task_del(systq, &so->so_snd.sb_wtask)) {
> + NET_UNLOCK();
> + taskq_barrier(systq);
> + NET_LOCK();
> + }
> so->so_state |= SS_NOFDREF;
> sofree(so);
> sounlock(s);
> @@ -296,9 +306,8 @@ soaccept(struct socket *so, struct mbuf
> int error = 0;
>
> soassertlocked(so);
> + KASSERT(so->so_state & SS_NOFDREF);
>
> - if ((so->so_state & SS_NOFDREF) == 0)
> - panic("soaccept !NOFDREF: so %p, so_type %d", so, so->so_type);
> so->so_state &= ~SS_NOFDREF;
> if ((so->so_state & SS_ISDISCONNECTED) == 0 ||
> (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0)
> @@ -1052,12 +1061,8 @@ sorflush(struct socket *so)
> sbunlock(so, sb);
> aso.so_proto = pr;
> aso.so_rcv = *sb;
> - memset(sb, 0, sizeof (*sb));
> - /* XXX - the memset stomps all over so_rcv */
> - if (aso.so_rcv.sb_flags & SB_KNOTE) {
> - sb->sb_sel.si_note = aso.so_rcv.sb_sel.si_note;
> - sb->sb_flags = SB_KNOTE;
> - }
> + memset(&sb->sb_startzero, 0,
> + (caddr_t)&sb->sb_endzero - (caddr_t)&sb->sb_startzero);
> if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
> (*pr->pr_domain->dom_dispose)(aso.so_rcv.sb_mb);
> sbrelease(&aso, &aso.so_rcv);
> @@ -1178,8 +1183,8 @@ sosplice(struct socket *so, int fd, off_
> * we sleep, the socket buffers are not marked as spliced yet.
> */
> if (somove(so, M_WAIT)) {
> - so->so_rcv.sb_flagsintr |= SB_SPLICE;
> - sosp->so_snd.sb_flagsintr |= SB_SPLICE;
> + so->so_rcv.sb_flags |= SB_SPLICE;
> + sosp->so_snd.sb_flags |= SB_SPLICE;
> }
>
> release:
> @@ -1196,8 +1201,8 @@ sounsplice(struct socket *so, struct soc
>
> task_del(sosplice_taskq, &so->so_splicetask);
> timeout_del(&so->so_idleto);
> - sosp->so_snd.sb_flagsintr &= ~SB_SPLICE;
> - so->so_rcv.sb_flagsintr &= ~SB_SPLICE;
> + sosp->so_snd.sb_flags &= ~SB_SPLICE;
> + so->so_rcv.sb_flags &= ~SB_SPLICE;
> so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL;
> if (wakeup && soreadable(so))
> sorwakeup(so);
> @@ -1210,7 +1215,7 @@ soidle(void *arg)
> int s;
>
> s = solock(so);
> - if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
> + if (so->so_rcv.sb_flags & SB_SPLICE) {
> so->so_error = ETIMEDOUT;
> sounsplice(so, so->so_sp->ssp_socket, 1);
> }
> @@ -1224,7 +1229,7 @@ sotask(void *arg)
> int s;
>
> s = solock(so);
> - if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
> + if (so->so_rcv.sb_flags & SB_SPLICE) {
> /*
> * We may not sleep here as sofree() and unsplice() may be
> * called from softnet interrupt context. This would remove
> @@ -1527,7 +1532,7 @@ sorwakeup(struct socket *so)
> soassertlocked(so);
>
> #ifdef SOCKET_SPLICE
> - if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
> + if (so->so_rcv.sb_flags & SB_SPLICE) {
> /*
> * TCP has a sendbuffer that can handle multiple packets
> * at once. So queue the stream a bit to accumulate data.
> @@ -1544,7 +1549,8 @@ sorwakeup(struct socket *so)
> if (isspliced(so))
> return;
> #endif
> - sowakeup(so, &so->so_rcv);
> + if ((so->so_state & SS_NONOTIF) == 0)
> + task_add(systq, &so->so_rcv.sb_wtask);
> if (so->so_upcall)
> (*(so->so_upcall))(so, so->so_upcallarg, M_DONTWAIT);
> }
> @@ -1555,10 +1561,12 @@ sowwakeup(struct socket *so)
> soassertlocked(so);
>
> #ifdef SOCKET_SPLICE
> - if (so->so_snd.sb_flagsintr & SB_SPLICE)
> + if (so->so_snd.sb_flags & SB_SPLICE)
> task_add(sosplice_taskq, &so->so_sp->ssp_soback->so_splicetask);
> #endif
> - sowakeup(so, &so->so_snd);
> +
> + if ((so->so_state & SS_NONOTIF) == 0)
> + task_add(systq, &so->so_snd.sb_wtask);
> }
>
> int
> @@ -2025,7 +2033,6 @@ sobuf_print(struct sockbuf *sb,
> (*pr)("\tsb_mbtail: %p\n", sb->sb_mbtail);
> (*pr)("\tsb_lastrecord: %p\n", sb->sb_lastrecord);
> (*pr)("\tsb_sel: ...\n");
> - (*pr)("\tsb_flagsintr: %d\n", sb->sb_flagsintr);
> (*pr)("\tsb_flags: %i\n", sb->sb_flags);
> (*pr)("\tsb_timeo: %i\n", sb->sb_timeo);
> }
> Index: kern/uipc_socket2.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
> retrieving revision 1.87
> diff -u -p -r1.87 uipc_socket2.c
> --- kern/uipc_socket2.c 23 Nov 2017 13:42:53 -0000 1.87
> +++ kern/uipc_socket2.c 27 Nov 2017 11:10:57 -0000
> @@ -189,6 +189,8 @@ sonewconn(struct socket *head, int conns
> so->so_rcv.sb_wat = head->so_rcv.sb_wat;
> so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
> so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
> + task_set(&so->so_rcv.sb_wtask, sorwakeup_cb, so);
> + task_set(&so->so_snd.sb_wtask, sowwakeup_cb, so);
>
> soqinsque(head, so, soqueue);
> if ((*so->so_proto->pr_attach)(so, 0)) {
> @@ -329,12 +331,12 @@ sosleep(struct socket *so, void *ident,
> int
> sbwait(struct socket *so, struct sockbuf *sb)
> {
> + int prio = (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH;
> +
> soassertlocked(so);
>
> - sb->sb_flagsintr |= SB_WAIT;
> - return (sosleep(so, &sb->sb_cc,
> - (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "netio",
> - sb->sb_timeo));
> + sb->sb_flags |= SB_WAIT;
> + return (sosleep(so, &sb->sb_cc, prio, "netio", sb->sb_timeo));
> }
>
> int
> @@ -351,6 +353,9 @@ sblock(struct socket *so, struct sockbuf
> if (wait & M_NOWAIT)
> return (EWOULDBLOCK);
>
> + /* XXXSMP ensure sosleep() isn't called from input path. */
> + KERNEL_ASSERT_LOCKED();
> +
> while (sb->sb_flags & SB_LOCK) {
> sb->sb_flags |= SB_WANT;
> error = sosleep(so, &sb->sb_flags, prio, "netlck", 0);
> @@ -364,13 +369,35 @@ sblock(struct socket *so, struct sockbuf
> void
> sbunlock(struct socket *so, struct sockbuf *sb)
> {
> + int flags = sb->sb_flags;
> +
> soassertlocked(so);
>
> - sb->sb_flags &= ~SB_LOCK;
> - if (sb->sb_flags & SB_WANT) {
> - sb->sb_flags &= ~SB_WANT;
> + sb->sb_flags &= ~(SB_LOCK|SB_WANT);
> + if (flags & SB_WANT)
> wakeup(&sb->sb_flags);
> - }
> +}
> +
> +void
> +sorwakeup_cb(void *xso)
> +{
> + struct socket *so = xso;
> + int s;
> +
> + s = solock(so);
> + sowakeup(so, &so->so_rcv);
> + sounlock(s);
> +}
> +
> +void
> +sowwakeup_cb(void *xso)
> +{
> + struct socket *so = xso;
> + int s;
> +
> + s = solock(so);
> + sowakeup(so, &so->so_snd);
> + sounlock(s);
> }
>
> /*
> @@ -381,15 +408,15 @@ sbunlock(struct socket *so, struct sockb
> void
> sowakeup(struct socket *so, struct sockbuf *sb)
> {
> + int flags = sb->sb_flags;
> +
> KERNEL_ASSERT_LOCKED();
> soassertlocked(so);
>
> selwakeup(&sb->sb_sel);
> - sb->sb_flagsintr &= ~SB_SEL;
> - if (sb->sb_flagsintr & SB_WAIT) {
> - sb->sb_flagsintr &= ~SB_WAIT;
> + sb->sb_flags &= ~(SB_SEL|SB_WAIT);
> + if (flags & SB_WAIT)
> wakeup(&sb->sb_cc);
> - }
> if (so->so_state & SS_ASYNC)
> csignal(so->so_pgid, SIGIO, so->so_siguid, so->so_sigeuid);
> }
> Index: miscfs/fifofs/fifo_vnops.c
> ===================================================================
> RCS file: /cvs/src/sys/miscfs/fifofs/fifo_vnops.c,v
> retrieving revision 1.59
> diff -u -p -r1.59 fifo_vnops.c
> --- miscfs/fifofs/fifo_vnops.c 4 Nov 2017 14:13:53 -0000 1.59
> +++ miscfs/fifofs/fifo_vnops.c 27 Nov 2017 10:48:00 -0000
> @@ -333,11 +333,11 @@ fifo_poll(void *v)
> events = POLLIN;
> if (events & (POLLIN | POLLRDNORM)) {
> selrecord(ap->a_p, &rso->so_rcv.sb_sel);
> - rso->so_rcv.sb_flagsintr |= SB_SEL;
> + rso->so_rcv.sb_flags |= SB_SEL;
> }
> if (events & (POLLOUT | POLLWRNORM)) {
> selrecord(ap->a_p, &wso->so_snd.sb_sel);
> - wso->so_snd.sb_flagsintr |= SB_SEL;
> + wso->so_snd.sb_flags |= SB_SEL;
> }
> }
> return (revents);
> Index: net/if.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if.c,v
> retrieving revision 1.530
> diff -u -p -r1.530 if.c
> --- net/if.c 20 Nov 2017 10:16:25 -0000 1.530
> +++ net/if.c 27 Nov 2017 10:13:22 -0000
> @@ -933,7 +933,6 @@ if_netisr(void *unused)
> {
> int n, t = 0;
>
> - KERNEL_LOCK();
> NET_LOCK();
>
> while ((n = netisr) != 0) {
> @@ -947,8 +946,11 @@ if_netisr(void *unused)
> atomic_clearbits_int(&netisr, n);
>
> #if NETHER > 0
> - if (n & (1 << NETISR_ARP))
> + if (n & (1 << NETISR_ARP)) {
> + KERNEL_LOCK();
> arpintr();
> + KERNEL_UNLOCK();
> + }
> #endif
> if (n & (1 << NETISR_IP))
> ipintr();
> @@ -957,35 +959,52 @@ if_netisr(void *unused)
> ip6intr();
> #endif
> #if NPPP > 0
> - if (n & (1 << NETISR_PPP))
> + if (n & (1 << NETISR_PPP)) {
> + KERNEL_LOCK();
> pppintr();
> + KERNEL_UNLOCK();
> + }
> #endif
> #if NBRIDGE > 0
> - if (n & (1 << NETISR_BRIDGE))
> + if (n & (1 << NETISR_BRIDGE)) {
> + KERNEL_LOCK();
> bridgeintr();
> + KERNEL_UNLOCK();
> + }
> #endif
> #if NSWITCH > 0
> - if (n & (1 << NETISR_SWITCH))
> + if (n & (1 << NETISR_SWITCH)) {
> + KERNEL_LOCK();
> switchintr();
> + KERNEL_UNLOCK();
> + }
> #endif
> #if NPPPOE > 0
> - if (n & (1 << NETISR_PPPOE))
> + if (n & (1 << NETISR_PPPOE)) {
> + KERNEL_LOCK();
> pppoeintr();
> + KERNEL_UNLOCK();
> + }
> #endif
> #ifdef PIPEX
> - if (n & (1 << NETISR_PIPEX))
> + if (n & (1 << NETISR_PIPEX)) {
> + KERNEL_LOCK();
> pipexintr();
> + KERNEL_UNLOCK();
> + }
> #endif
> t |= n;
> }
>
> #if NPFSYNC > 0
> - if (t & (1 << NETISR_PFSYNC))
> + if (t & (1 << NETISR_PFSYNC)) {
> + KERNEL_LOCK();
> pfsyncintr();
> + KERNEL_UNLOCK();
> + }
> #endif
>
> NET_UNLOCK();
> - KERNEL_UNLOCK();
> }
>
> void
> Index: sys/socketvar.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/socketvar.h,v
> retrieving revision 1.79
> diff -u -p -r1.79 socketvar.h
> --- sys/socketvar.h 23 Nov 2017 13:45:46 -0000 1.79
> +++ sys/socketvar.h 27 Nov 2017 10:48:45 -0000
> @@ -98,6 +98,8 @@ struct socket {
> * Variables for socket buffering.
> */
> struct sockbuf {
> +/* The following fields are all zeroed on flush. */
> +#define sb_startzero sb_cc
> u_long sb_cc; /* actual chars in buffer */
> u_long sb_datacc; /* data only chars in buffer */
> u_long sb_hiwat; /* max actual char count */
> @@ -109,10 +111,13 @@ struct socket {
> struct mbuf *sb_mbtail; /* the last mbuf in the chain */
> struct mbuf *sb_lastrecord;/* first mbuf of last record in
> socket buffer */
> +/* End area that is zeroed on flush. */
> +#define sb_endzero sb_sel
> struct selinfo sb_sel; /* process selecting read/write */
> - int sb_flagsintr; /* flags, changed during interrupt */
> - short sb_flags; /* flags, see below */
> + int sb_flags ; /* flags, see below */
> + short __pad;
> u_short sb_timeo; /* timeout for read/write */
> + struct task sb_wtask; /* delay csignal() and selwakeup() */
> } so_rcv, so_snd;
> #define SB_MAX (2*1024*1024) /* default for max chars in
> sockbuf */
> #define SB_LOCK 0x01 /* lock on data queue */
> @@ -149,6 +154,7 @@ struct socket {
> #define SS_CONNECTOUT 0x1000 /* connect, not accept, at this
> end */
> #define SS_ISSENDING 0x2000 /* hint for lower layer */
> #define SS_DNS 0x4000 /* created using SOCK_DNS
> socket(2) */
> +#define SS_NONOTIF 0x8000 /* no notification allowed */
>
> #ifdef _KERNEL
>
> @@ -169,7 +175,7 @@ void soassertlocked(struct socket *);
> static inline int
> sb_notify(struct socket *so, struct sockbuf *sb)
> {
> - int flags = (sb->sb_flags | sb->sb_flagsintr);
> + int flags = sb->sb_flags;
>
> KASSERT(sb == &so->so_rcv || sb == &so->so_snd);
> soassertlocked(so);
> @@ -329,6 +335,8 @@ int sosend(struct socket *so, struct mbu
> int sosetopt(struct socket *so, int level, int optname, struct mbuf *m);
> int soshutdown(struct socket *so, int how);
> void sowakeup(struct socket *so, struct sockbuf *sb);
> +void sorwakeup_cb(void *);
> +void sowwakeup_cb(void *);
> void sorwakeup(struct socket *);
> void sowwakeup(struct socket *);
> int sockargs(struct mbuf **, const void *, size_t, int);