Diff below remove the KERNEL_LOCK() around all pr_input() routines.
It's a bit rough so I'd appreciate more tests before splitting it into
pieces.

I'm using tasks to delay selwakeup/csignal calls, just like I did for
bpf(4).

Questions, Comments?

Index: kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.207
diff -u -p -r1.207 uipc_socket.c
--- kern/uipc_socket.c  4 Nov 2017 14:13:53 -0000       1.207
+++ kern/uipc_socket.c  20 Nov 2017 15:12:29 -0000
@@ -135,6 +135,8 @@ socreate(int dom, struct socket **aso, i
        so->so_egid = p->p_ucred->cr_gid;
        so->so_cpid = p->p_p->ps_pid;
        so->so_proto = prp;
+       task_set(&so->so_rcv.sb_wtask, sorwakeup_cb, so);
+       task_set(&so->so_snd.sb_wtask, sowwakeup_cb, so);
 
        s = solock(so);
        error = (*prp->pr_attach)(so, proto);
@@ -205,6 +207,11 @@ sofree(struct socket *so)
                if (!soqremque(so, 0))
                        return;
        }
+
+       if (!task_del(systq, &so->so_rcv.sb_wtask) ||
+           !task_del(systq, &so->so_snd.sb_wtask))
+               taskq_barrier(systq);
+
 #ifdef SOCKET_SPLICE
        if (so->so_sp) {
                if (issplicedback(so))
@@ -453,7 +460,7 @@ restart:
                    (atomic || space < so->so_snd.sb_lowat))) {
                        if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT))
                                snderr(EWOULDBLOCK);
-                       sbunlock(&so->so_snd);
+                       sbunlock(so, &so->so_snd);
                        error = sbwait(so, &so->so_snd);
                        so->so_state &= ~SS_ISSENDING;
                        if (error)
@@ -497,7 +504,7 @@ restart:
 
 release:
        so->so_state &= ~SS_ISSENDING;
-       sbunlock(&so->so_snd);
+       sbunlock(so, &so->so_snd);
 out:
        sounlock(s);
        m_freem(top);
@@ -736,7 +743,7 @@ restart:
                }
                SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1");
                SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1");
-               sbunlock(&so->so_rcv);
+               sbunlock(so, &so->so_rcv);
                error = sbwait(so, &so->so_rcv);
                sounlock(s);
                if (error)
@@ -957,7 +964,7 @@ dontblock:
                        SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2");
                        error = sbwait(so, &so->so_rcv);
                        if (error) {
-                               sbunlock(&so->so_rcv);
+                               sbunlock(so, &so->so_rcv);
                                sounlock(s);
                                return (0);
                        }
@@ -993,7 +1000,7 @@ dontblock:
        }
        if (orig_resid == uio->uio_resid && orig_resid &&
            (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
-               sbunlock(&so->so_rcv);
+               sbunlock(so, &so->so_rcv);
                sounlock(s);
                goto restart;
        }
@@ -1004,7 +1011,7 @@ dontblock:
        if (flagsp)
                *flagsp |= flags;
 release:
-       sbunlock(&so->so_rcv);
+       sbunlock(so, &so->so_rcv);
        sounlock(s);
        return (error);
 }
@@ -1044,20 +1051,18 @@ sorflush(struct socket *so)
        struct socket aso;
        int error;
 
+       soassertlocked(so);
+
        sb->sb_flags |= SB_NOINTR;
        error = sblock(so, sb, M_WAITOK);
        /* with SB_NOINTR and M_WAITOK sblock() must not fail */
        KASSERT(error == 0);
        socantrcvmore(so);
-       sbunlock(sb);
+       sbunlock(so, sb);
        aso.so_proto = pr;
        aso.so_rcv = *sb;
-       memset(sb, 0, sizeof (*sb));
-       /* XXX - the memset stomps all over so_rcv */
-       if (aso.so_rcv.sb_flags & SB_KNOTE) {
-               sb->sb_sel.si_note = aso.so_rcv.sb_sel.si_note;
-               sb->sb_flags = SB_KNOTE;
-       }
+       memset(&sb->sb_startzero, 0,
+           (caddr_t)&sb->sb_endzero - (caddr_t)&sb->sb_startzero);
        if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
                (*pr->pr_domain->dom_dispose)(aso.so_rcv.sb_mb);
        sbrelease(&aso, &aso.so_rcv);
@@ -1110,7 +1115,7 @@ sosplice(struct socket *so, int fd, off_
                }
                if (so->so_sp->ssp_socket)
                        sounsplice(so, so->so_sp->ssp_socket, 1);
-               sbunlock(&so->so_rcv);
+               sbunlock(so, &so->so_rcv);
                return (0);
        }
 
@@ -1139,7 +1144,7 @@ sosplice(struct socket *so, int fd, off_
                return (error);
        }
        if ((error = sblock(so, &sosp->so_snd, M_WAITOK)) != 0) {
-               sbunlock(&so->so_rcv);
+               sbunlock(so, &so->so_rcv);
                FRELE(fp, curproc);
                return (error);
        }
@@ -1183,8 +1188,8 @@ sosplice(struct socket *so, int fd, off_
        }
 
  release:
-       sbunlock(&sosp->so_snd);
-       sbunlock(&so->so_rcv);
+       sbunlock(sosp, &sosp->so_snd);
+       sbunlock(so, &so->so_rcv);
        FRELE(fp, curproc);
        return (error);
 }
@@ -1544,7 +1549,8 @@ sorwakeup(struct socket *so)
        if (isspliced(so))
                return;
 #endif
-       sowakeup(so, &so->so_rcv);
+       if ((so->so_state & SS_NOFDREF) == 0)
+               task_add(systq, &so->so_rcv.sb_wtask);
        if (so->so_upcall)
                (*(so->so_upcall))(so, so->so_upcallarg, M_DONTWAIT);
 }
@@ -1558,7 +1564,9 @@ sowwakeup(struct socket *so)
        if (so->so_snd.sb_flagsintr & SB_SPLICE)
                task_add(sosplice_taskq, &so->so_sp->ssp_soback->so_splicetask);
 #endif
-       sowakeup(so, &so->so_snd);
+
+       if ((so->so_state & SS_NOFDREF) == 0)
+               task_add(systq, &so->so_snd.sb_wtask);
 }
 
 int
Index: kern/uipc_socket2.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
retrieving revision 1.86
diff -u -p -r1.86 uipc_socket2.c
--- kern/uipc_socket2.c 11 Aug 2017 21:24:19 -0000      1.86
+++ kern/uipc_socket2.c 20 Nov 2017 15:08:39 -0000
@@ -189,6 +189,8 @@ sonewconn(struct socket *head, int conns
        so->so_rcv.sb_wat = head->so_rcv.sb_wat;
        so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
        so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
+       task_set(&so->so_rcv.sb_wtask, sorwakeup_cb, so);
+       task_set(&so->so_snd.sb_wtask, sowwakeup_cb, so);
 
        soqinsque(head, so, soqueue);
        if ((*so->so_proto->pr_attach)(so, 0)) {
@@ -342,7 +344,6 @@ sblock(struct socket *so, struct sockbuf
 {
        int error, prio = (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH;
 
-       KERNEL_ASSERT_LOCKED();
        soassertlocked(so);
 
        if ((sb->sb_flags & SB_LOCK) == 0) {
@@ -363,15 +364,37 @@ sblock(struct socket *so, struct sockbuf
 }
 
 void
-sbunlock(struct sockbuf *sb)
+sbunlock(struct socket *so, struct sockbuf *sb)
 {
-       KERNEL_ASSERT_LOCKED();
+       soassertlocked(so);
 
        sb->sb_flags &= ~SB_LOCK;
        if (sb->sb_flags & SB_WANT) {
                sb->sb_flags &= ~SB_WANT;
                wakeup(&sb->sb_flags);
        }
+}
+
+void
+sorwakeup_cb(void *xso)
+{
+       struct socket *so = xso;
+       int s;
+
+       s = solock(so);
+       sowakeup(so, &so->so_rcv);
+       sounlock(s);
+}
+
+void
+sowwakeup_cb(void *xso)
+{
+       struct socket *so = xso;
+       int s;
+
+       s = solock(so);
+       sowakeup(so, &so->so_snd);
+       sounlock(s);
 }
 
 /*
Index: net/if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.530
diff -u -p -r1.530 if.c
--- net/if.c    20 Nov 2017 10:16:25 -0000      1.530
+++ net/if.c    20 Nov 2017 11:47:43 -0000
@@ -933,7 +933,6 @@ if_netisr(void *unused)
 {
        int n, t = 0;
 
-       KERNEL_LOCK();
        NET_LOCK();
 
        while ((n = netisr) != 0) {
@@ -947,8 +946,11 @@ if_netisr(void *unused)
                atomic_clearbits_int(&netisr, n);
 
 #if NETHER > 0
-               if (n & (1 << NETISR_ARP))
+               if (n & (1 << NETISR_ARP)) {
+                       KERNEL_LOCK();
                        arpintr();
+                       KERNEL_UNLOCK();
+               }
 #endif
                if (n & (1 << NETISR_IP))
                        ipintr();
@@ -957,35 +959,52 @@ if_netisr(void *unused)
                        ip6intr();
 #endif
 #if NPPP > 0
-               if (n & (1 << NETISR_PPP))
+               if (n & (1 << NETISR_PPP)) {
+                       KERNEL_LOCK();
                        pppintr();
+                       KERNEL_UNLOCK();
+               }
 #endif
 #if NBRIDGE > 0
-               if (n & (1 << NETISR_BRIDGE))
+               if (n & (1 << NETISR_BRIDGE)) {
+                       KERNEL_LOCK();
                        bridgeintr();
+                       KERNEL_UNLOCK();
+               }
 #endif
 #if NSWITCH > 0
-               if (n & (1 << NETISR_SWITCH))
+               if (n & (1 << NETISR_SWITCH)) {
+                       KERNEL_LOCK();
                        switchintr();
+                       KERNEL_UNLOCK();
+               }
 #endif
 #if NPPPOE > 0
-               if (n & (1 << NETISR_PPPOE))
+               if (n & (1 << NETISR_PPPOE)) {
+                       KERNEL_LOCK();
                        pppoeintr();
+                       KERNEL_UNLOCK();
+               }
 #endif
 #ifdef PIPEX
-               if (n & (1 << NETISR_PIPEX))
+               if (n & (1 << NETISR_PIPEX)) {
+                       KERNEL_LOCK();
                        pipexintr();
+                       KERNEL_UNLOCK();
+               }
 #endif
                t |= n;
        }
 
 #if NPFSYNC > 0
-       if (t & (1 << NETISR_PFSYNC))
+       if (t & (1 << NETISR_PFSYNC)) {
+               KERNEL_LOCK();
                pfsyncintr();
+               KERNEL_UNLOCK();
+       }
 #endif
 
        NET_UNLOCK();
-       KERNEL_UNLOCK();
 }
 
 void
Index: netinet/ip_input.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.333
diff -u -p -r1.333 ip_input.c
--- netinet/ip_input.c  20 Nov 2017 10:35:24 -0000      1.333
+++ netinet/ip_input.c  20 Nov 2017 11:49:00 -0000
@@ -619,8 +619,6 @@ ip_deliver(struct mbuf **mp, int *offp, 
        int nest = 0;
 #endif /* INET6 */
 
-       KERNEL_ASSERT_LOCKED();
-
        /* pf might have modified stuff, might have to chksum */
        switch (af) {
        case AF_INET:
Index: sys/socketvar.h
===================================================================
RCS file: /cvs/src/sys/sys/socketvar.h,v
retrieving revision 1.77
diff -u -p -r1.77 socketvar.h
--- sys/socketvar.h     4 Nov 2017 14:13:53 -0000       1.77
+++ sys/socketvar.h     20 Nov 2017 15:08:53 -0000
@@ -98,6 +98,8 @@ struct socket {
  * Variables for socket buffering.
  */
        struct  sockbuf {
+/* The following fields are all zeroed on flush. */
+#define        sb_startzero    sb_cc
                u_long  sb_cc;          /* actual chars in buffer */
                u_long  sb_datacc;      /* data only chars in buffer */
                u_long  sb_hiwat;       /* max actual char count */
@@ -109,10 +111,13 @@ struct socket {
                struct mbuf *sb_mbtail; /* the last mbuf in the chain */
                struct mbuf *sb_lastrecord;/* first mbuf of last record in
                                              socket buffer */
+/* End area that is zeroed on flush. */
+#define        sb_endzero      sb_sel
                struct  selinfo sb_sel; /* process selecting read/write */
                int     sb_flagsintr;   /* flags, changed during interrupt */
                short   sb_flags;       /* flags, see below */
                u_short sb_timeo;       /* timeout for read/write */
+               struct  task sb_wtask;  /* delay csignal() and selwakeup() */
        } so_rcv, so_snd;
 #define        SB_MAX          (2*1024*1024)   /* default for max chars in 
sockbuf */
 #define        SB_LOCK         0x01            /* lock on data queue */
@@ -244,7 +249,7 @@ soreadable(struct socket *so)
 int sblock(struct socket *, struct sockbuf *, int);
 
 /* release lock on sockbuf sb */
-void sbunlock(struct sockbuf *);
+void sbunlock(struct socket *, struct sockbuf *);
 
 #define        SB_EMPTY_FIXUP(sb) do {                                         
\
        if ((sb)->sb_mb == NULL) {                                      \
@@ -329,6 +334,8 @@ int sosend(struct socket *so, struct mbu
 int    sosetopt(struct socket *so, int level, int optname, struct mbuf *m);
 int    soshutdown(struct socket *so, int how);
 void   sowakeup(struct socket *so, struct sockbuf *sb);
+void   sorwakeup_cb(void *);
+void   sowwakeup_cb(void *);
 void   sorwakeup(struct socket *);
 void   sowwakeup(struct socket *);
 int    sockargs(struct mbuf **, const void *, size_t, int);

Reply via email to