Author: jch
Date: Mon Aug  3 12:13:54 2015
New Revision: 286227
URL: https://svnweb.freebsd.org/changeset/base/286227
Log:
  Decompose TCP INP_INFO lock to increase short-lived TCP connections 
scalability:
  
  - The existing TCP INP_INFO lock continues to protect the global inpcb list
    stability during full list traversal (e.g. tcp_pcblist()).
  
  - A new INP_LIST lock protects inpcb list actual modifications (inp allocation
    and free) and inpcb global counters.
  
  It allows to use TCP INP_INFO_RLOCK lock in critical paths (e.g. tcp_input())
  and INP_INFO_WLOCK only in occasional operations that walk all connections.
  
  PR:                   183659
  Differential Revision:        https://reviews.freebsd.org/D2599
  Reviewed by:          jhb, adrian
  Tested by:            adrian, nitroboost-gmail.com
  Sponsored by:         Verisign, Inc.

Modified:
  head/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c
  head/sys/dev/cxgb/ulp/tom/cxgb_listen.c
  head/sys/dev/cxgbe/tom/t4_connect.c
  head/sys/dev/cxgbe/tom/t4_cpl_io.c
  head/sys/dev/cxgbe/tom/t4_listen.c
  head/sys/netinet/in_pcb.c
  head/sys/netinet/in_pcb.h
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_subr.c
  head/sys/netinet/tcp_syncache.c
  head/sys/netinet/tcp_timer.c
  head/sys/netinet/tcp_timewait.c
  head/sys/netinet/tcp_usrreq.c
  head/sys/netinet/toecore.c
  head/sys/netinet6/in6_pcb.c

Modified: head/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c
==============================================================================
--- head/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c     Mon Aug  3 11:57:11 2015        
(r286226)
+++ head/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c     Mon Aug  3 12:13:54 2015        
(r286227)
@@ -639,7 +639,7 @@ t3_send_fin(struct toedev *tod, struct t
        unsigned int tid = toep->tp_tid;
 #endif
 
-       INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+       INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
        INP_WLOCK_ASSERT(inp);
 
        CTR4(KTR_CXGB, "%s: tid %d, toep %p, flags %x", __func__, tid, toep,
@@ -925,12 +925,12 @@ do_act_open_rpl(struct sge_qset *qs, str
 
        rc = act_open_rpl_status_to_errno(s);
        if (rc != EAGAIN)
-               INP_INFO_WLOCK(&V_tcbinfo);
+               INP_INFO_RLOCK(&V_tcbinfo);
        INP_WLOCK(inp);
        toe_connect_failed(tod, inp, rc);
        toepcb_release(toep);   /* unlocks inp */
        if (rc != EAGAIN)
-               INP_INFO_WUNLOCK(&V_tcbinfo);
+               INP_INFO_RUNLOCK(&V_tcbinfo);
 
        m_freem(m);
        return (0);
@@ -1061,7 +1061,7 @@ send_reset(struct toepcb *toep)
        struct adapter *sc = tod->tod_softc;
        struct mbuf *m;
 
-       INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+       INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
        INP_WLOCK_ASSERT(inp);
 
        CTR4(KTR_CXGB, "%s: tid %d, toep %p (%x)", __func__, tid, toep,
@@ -1172,12 +1172,12 @@ do_rx_data(struct sge_qset *qs, struct r
                SOCKBUF_UNLOCK(so_rcv);
                INP_WUNLOCK(inp);
 
-               INP_INFO_WLOCK(&V_tcbinfo);
+               INP_INFO_RLOCK(&V_tcbinfo);
                INP_WLOCK(inp);
                tp = tcp_drop(tp, ECONNRESET);
                if (tp)
                        INP_WUNLOCK(inp);
-               INP_INFO_WUNLOCK(&V_tcbinfo);
+               INP_INFO_RUNLOCK(&V_tcbinfo);
 
                m_freem(m);
                return (0);
@@ -1222,7 +1222,7 @@ do_peer_close(struct sge_qset *qs, struc
        struct tcpcb *tp;
        struct socket *so;
 
-       INP_INFO_WLOCK(&V_tcbinfo);
+       INP_INFO_RLOCK(&V_tcbinfo);
        INP_WLOCK(inp);
        tp = intotcpcb(inp);
 
@@ -1250,7 +1250,7 @@ do_peer_close(struct sge_qset *qs, struc
        case TCPS_FIN_WAIT_2:
                tcp_twstart(tp);
                INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the  inp */
-               INP_INFO_WUNLOCK(&V_tcbinfo);
+               INP_INFO_RUNLOCK(&V_tcbinfo);
 
                INP_WLOCK(inp);
                toepcb_release(toep);   /* no more CPLs expected */
@@ -1264,7 +1264,7 @@ do_peer_close(struct sge_qset *qs, struc
 
 done:
        INP_WUNLOCK(inp);
-       INP_INFO_WUNLOCK(&V_tcbinfo);
+       INP_INFO_RUNLOCK(&V_tcbinfo);
 
        m_freem(m);
        return (0);
@@ -1285,7 +1285,7 @@ do_close_con_rpl(struct sge_qset *qs, st
        struct tcpcb *tp;
        struct socket *so;
 
-       INP_INFO_WLOCK(&V_tcbinfo);
+       INP_INFO_RLOCK(&V_tcbinfo);
        INP_WLOCK(inp);
        tp = intotcpcb(inp);
 
@@ -1303,7 +1303,7 @@ do_close_con_rpl(struct sge_qset *qs, st
                tcp_twstart(tp);
 release:
                INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the  inp */
-               INP_INFO_WUNLOCK(&V_tcbinfo);
+               INP_INFO_RUNLOCK(&V_tcbinfo);
 
                INP_WLOCK(inp);
                toepcb_release(toep);   /* no more CPLs expected */
@@ -1328,7 +1328,7 @@ release:
 
 done:
        INP_WUNLOCK(inp);
-       INP_INFO_WUNLOCK(&V_tcbinfo);
+       INP_INFO_RUNLOCK(&V_tcbinfo);
 
        m_freem(m);
        return (0);
@@ -1489,7 +1489,7 @@ do_abort_req(struct sge_qset *qs, struct
                return (do_abort_req_synqe(qs, r, m));
 
        inp = toep->tp_inp;
-       INP_INFO_WLOCK(&V_tcbinfo);     /* for tcp_close */
+       INP_INFO_RLOCK(&V_tcbinfo);     /* for tcp_close */
        INP_WLOCK(inp);
 
        tp = intotcpcb(inp);
@@ -1503,7 +1503,7 @@ do_abort_req(struct sge_qset *qs, struct
                toep->tp_flags |= TP_ABORT_REQ_RCVD;
                toep->tp_flags |= TP_ABORT_SHUTDOWN;
                INP_WUNLOCK(inp);
-               INP_INFO_WUNLOCK(&V_tcbinfo);
+               INP_INFO_RUNLOCK(&V_tcbinfo);
                m_freem(m);
                return (0);
        }
@@ -1523,7 +1523,7 @@ do_abort_req(struct sge_qset *qs, struct
                        INP_WLOCK(inp); /* re-acquire */
                toepcb_release(toep);   /* no more CPLs expected */
        }
-       INP_INFO_WUNLOCK(&V_tcbinfo);
+       INP_INFO_RUNLOCK(&V_tcbinfo);
 
        send_abort_rpl(tod, tid, qset);
        m_freem(m);

Modified: head/sys/dev/cxgb/ulp/tom/cxgb_listen.c
==============================================================================
--- head/sys/dev/cxgb/ulp/tom/cxgb_listen.c     Mon Aug  3 11:57:11 2015        
(r286226)
+++ head/sys/dev/cxgb/ulp/tom/cxgb_listen.c     Mon Aug  3 12:13:54 2015        
(r286227)
@@ -541,11 +541,11 @@ do_pass_accept_req(struct sge_qset *qs, 
                        REJECT_PASS_ACCEPT();   /* no l2te, or ifp mismatch */
        }
 
-       INP_INFO_WLOCK(&V_tcbinfo);
+       INP_INFO_RLOCK(&V_tcbinfo);
 
        /* Don't offload if the 4-tuple is already in use */
        if (toe_4tuple_check(&inc, &th, ifp) != 0) {
-               INP_INFO_WUNLOCK(&V_tcbinfo);
+               INP_INFO_RUNLOCK(&V_tcbinfo);
                REJECT_PASS_ACCEPT();
        }
 
@@ -558,7 +558,7 @@ do_pass_accept_req(struct sge_qset *qs, 
                 * resources tied to this listen context.
                 */
                INP_WUNLOCK(inp);
-               INP_INFO_WUNLOCK(&V_tcbinfo);
+               INP_INFO_RUNLOCK(&V_tcbinfo);
                REJECT_PASS_ACCEPT();
        }
        so = inp->inp_socket;
@@ -686,7 +686,7 @@ do_pass_establish(struct sge_qset *qs, s
        struct toepcb *toep;
        struct socket *so;
        struct listen_ctx *lctx = synqe->lctx;
-       struct inpcb *inp = lctx->inp;
+       struct inpcb *inp = lctx->inp, *new_inp;
        struct tcpopt to;
        struct tcphdr th;
        struct in_conninfo inc;
@@ -700,7 +700,7 @@ do_pass_establish(struct sge_qset *qs, s
        KASSERT(qs->idx == synqe->qset,
            ("%s qset mismatch %d %d", __func__, qs->idx, synqe->qset));
 
-       INP_INFO_WLOCK(&V_tcbinfo);     /* for syncache_expand */
+       INP_INFO_RLOCK(&V_tcbinfo);     /* for syncache_expand */
        INP_WLOCK(inp);
 
        if (__predict_false(inp->inp_flags & INP_DROPPED)) {
@@ -714,7 +714,7 @@ do_pass_establish(struct sge_qset *qs, s
                    ("%s: listen socket dropped but tid %u not aborted.",
                    __func__, tid));
                INP_WUNLOCK(inp);
-               INP_INFO_WUNLOCK(&V_tcbinfo);
+               INP_INFO_RUNLOCK(&V_tcbinfo);
                m_freem(m);
                return (0);
        }
@@ -730,7 +730,7 @@ do_pass_establish(struct sge_qset *qs, s
 reset:
                t3_send_reset_synqe(tod, synqe);
                INP_WUNLOCK(inp);
-               INP_INFO_WUNLOCK(&V_tcbinfo);
+               INP_INFO_RUNLOCK(&V_tcbinfo);
                m_freem(m);
                return (0);
        }
@@ -748,21 +748,23 @@ reset:
                goto reset;
        }
 
-       if (__predict_false(!(synqe->flags & TP_SYNQE_EXPANDED))) {
-               struct inpcb *new_inp = sotoinpcb(so);
+       /* New connection inpcb is already locked by syncache_expand(). */
+       new_inp = sotoinpcb(so);
+       INP_WLOCK_ASSERT(new_inp);
 
-               INP_WLOCK(new_inp);
+       if (__predict_false(!(synqe->flags & TP_SYNQE_EXPANDED))) {
                tcp_timer_activate(intotcpcb(new_inp), TT_KEEP, 0);
                t3_offload_socket(tod, synqe, so);
-               INP_WUNLOCK(new_inp);
        }
 
+       INP_WUNLOCK(new_inp);
+
        /* Remove the synq entry and release its reference on the lctx */
        TAILQ_REMOVE(&lctx->synq, synqe, link);
        inp = release_lctx(td, lctx);
        if (inp)
                INP_WUNLOCK(inp);
-       INP_INFO_WUNLOCK(&V_tcbinfo);
+       INP_INFO_RUNLOCK(&V_tcbinfo);
        release_synqe(synqe);
 
        m_freem(m);
@@ -1128,7 +1130,7 @@ t3_offload_socket(struct toedev *tod, vo
        struct cpl_pass_establish *cpl = synqe->cpl;
        struct toepcb *toep = synqe->toep;
 
-       INP_INFO_LOCK_ASSERT(&V_tcbinfo); /* prevents bad race with accept() */
+       INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* prevents bad race with accept() */
        INP_WLOCK_ASSERT(inp);
 
        offload_socket(so, toep);

Modified: head/sys/dev/cxgbe/tom/t4_connect.c
==============================================================================
--- head/sys/dev/cxgbe/tom/t4_connect.c Mon Aug  3 11:57:11 2015        
(r286226)
+++ head/sys/dev/cxgbe/tom/t4_connect.c Mon Aug  3 12:13:54 2015        
(r286227)
@@ -189,12 +189,12 @@ act_open_failure_cleanup(struct adapter 
        toep->tid = -1;
 
        if (status != EAGAIN)
-               INP_INFO_WLOCK(&V_tcbinfo);
+               INP_INFO_RLOCK(&V_tcbinfo);
        INP_WLOCK(inp);
        toe_connect_failed(tod, inp, status);
        final_cpl_received(toep);       /* unlocks inp */
        if (status != EAGAIN)
-               INP_INFO_WUNLOCK(&V_tcbinfo);
+               INP_INFO_RUNLOCK(&V_tcbinfo);
 }
 
 static int

Modified: head/sys/dev/cxgbe/tom/t4_cpl_io.c
==============================================================================
--- head/sys/dev/cxgbe/tom/t4_cpl_io.c  Mon Aug  3 11:57:11 2015        
(r286226)
+++ head/sys/dev/cxgbe/tom/t4_cpl_io.c  Mon Aug  3 12:13:54 2015        
(r286227)
@@ -1085,7 +1085,7 @@ do_peer_close(struct sge_iq *iq, const s
 
        KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
 
-       INP_INFO_WLOCK(&V_tcbinfo);
+       INP_INFO_RLOCK(&V_tcbinfo);
        INP_WLOCK(inp);
        tp = intotcpcb(inp);
 
@@ -1127,7 +1127,7 @@ do_peer_close(struct sge_iq *iq, const s
        case TCPS_FIN_WAIT_2:
                tcp_twstart(tp);
                INP_UNLOCK_ASSERT(inp);  /* safe, we have a ref on the inp */
-               INP_INFO_WUNLOCK(&V_tcbinfo);
+               INP_INFO_RUNLOCK(&V_tcbinfo);
 
                INP_WLOCK(inp);
                final_cpl_received(toep);
@@ -1139,7 +1139,7 @@ do_peer_close(struct sge_iq *iq, const s
        }
 done:
        INP_WUNLOCK(inp);
-       INP_INFO_WUNLOCK(&V_tcbinfo);
+       INP_INFO_RUNLOCK(&V_tcbinfo);
        return (0);
 }
 
@@ -1166,7 +1166,7 @@ do_close_con_rpl(struct sge_iq *iq, cons
        KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
        KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
 
-       INP_INFO_WLOCK(&V_tcbinfo);
+       INP_INFO_RLOCK(&V_tcbinfo);
        INP_WLOCK(inp);
        tp = intotcpcb(inp);
 
@@ -1184,7 +1184,7 @@ do_close_con_rpl(struct sge_iq *iq, cons
                tcp_twstart(tp);
 release:
                INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the  inp */
-               INP_INFO_WUNLOCK(&V_tcbinfo);
+               INP_INFO_RUNLOCK(&V_tcbinfo);
 
                INP_WLOCK(inp);
                final_cpl_received(toep);       /* no more CPLs expected */
@@ -1208,7 +1208,7 @@ release:
        }
 done:
        INP_WUNLOCK(inp);
-       INP_INFO_WUNLOCK(&V_tcbinfo);
+       INP_INFO_RUNLOCK(&V_tcbinfo);
        return (0);
 }
 
@@ -1367,7 +1367,7 @@ do_abort_req(struct sge_iq *iq, const st
        }
 
        inp = toep->inp;
-       INP_INFO_WLOCK(&V_tcbinfo);     /* for tcp_close */
+       INP_INFO_RLOCK(&V_tcbinfo);     /* for tcp_close */
        INP_WLOCK(inp);
 
        tp = intotcpcb(inp);
@@ -1401,7 +1401,7 @@ do_abort_req(struct sge_iq *iq, const st
 
        final_cpl_received(toep);
 done:
-       INP_INFO_WUNLOCK(&V_tcbinfo);
+       INP_INFO_RUNLOCK(&V_tcbinfo);
        send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST);
        return (0);
 }
@@ -1515,12 +1515,12 @@ do_rx_data(struct sge_iq *iq, const stru
                SOCKBUF_UNLOCK(sb);
                INP_WUNLOCK(inp);
 
-               INP_INFO_WLOCK(&V_tcbinfo);
+               INP_INFO_RLOCK(&V_tcbinfo);
                INP_WLOCK(inp);
                tp = tcp_drop(tp, ECONNRESET);
                if (tp)
                        INP_WUNLOCK(inp);
-               INP_INFO_WUNLOCK(&V_tcbinfo);
+               INP_INFO_RUNLOCK(&V_tcbinfo);
 
                return (0);
        }

Modified: head/sys/dev/cxgbe/tom/t4_listen.c
==============================================================================
--- head/sys/dev/cxgbe/tom/t4_listen.c  Mon Aug  3 11:57:11 2015        
(r286226)
+++ head/sys/dev/cxgbe/tom/t4_listen.c  Mon Aug  3 12:13:54 2015        
(r286227)
@@ -930,7 +930,7 @@ t4_offload_socket(struct toedev *tod, vo
        struct cpl_pass_establish *cpl = mtod(synqe->syn, void *);
        struct toepcb *toep = *(struct toepcb **)(cpl + 1);
 
-       INP_INFO_LOCK_ASSERT(&V_tcbinfo); /* prevents bad race with accept() */
+       INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* prevents bad race with accept() */
        INP_WLOCK_ASSERT(inp);
        KASSERT(synqe->flags & TPF_SYNQE,
            ("%s: %p not a synq_entry?", __func__, arg));
@@ -1259,15 +1259,15 @@ do_pass_accept_req(struct sge_iq *iq, co
                REJECT_PASS_ACCEPT();
        rpl = wrtod(wr);
 
-       INP_INFO_WLOCK(&V_tcbinfo);     /* for 4-tuple check */
+       INP_INFO_RLOCK(&V_tcbinfo);     /* for 4-tuple check */
 
        /* Don't offload if the 4-tuple is already in use */
        if (toe_4tuple_check(&inc, &th, ifp) != 0) {
-               INP_INFO_WUNLOCK(&V_tcbinfo);
+               INP_INFO_RUNLOCK(&V_tcbinfo);
                free(wr, M_CXGBE);
                REJECT_PASS_ACCEPT();
        }
-       INP_INFO_WUNLOCK(&V_tcbinfo);
+       INP_INFO_RUNLOCK(&V_tcbinfo);
 
        inp = lctx->inp;                /* listening socket, not owned by TOE */
        INP_WLOCK(inp);
@@ -1441,7 +1441,7 @@ do_pass_establish(struct sge_iq *iq, con
        unsigned int tid = GET_TID(cpl);
        struct synq_entry *synqe = lookup_tid(sc, tid);
        struct listen_ctx *lctx = synqe->lctx;
-       struct inpcb *inp = lctx->inp;
+       struct inpcb *inp = lctx->inp, *new_inp;
        struct socket *so;
        struct tcphdr th;
        struct tcpopt to;
@@ -1459,7 +1459,7 @@ do_pass_establish(struct sge_iq *iq, con
        KASSERT(synqe->flags & TPF_SYNQE,
            ("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe));
 
-       INP_INFO_WLOCK(&V_tcbinfo);     /* for syncache_expand */
+       INP_INFO_RLOCK(&V_tcbinfo);     /* for syncache_expand */
        INP_WLOCK(inp);
 
        CTR6(KTR_CXGBE,
@@ -1475,7 +1475,7 @@ do_pass_establish(struct sge_iq *iq, con
                }
 
                INP_WUNLOCK(inp);
-               INP_INFO_WUNLOCK(&V_tcbinfo);
+               INP_INFO_RUNLOCK(&V_tcbinfo);
                return (0);
        }
 
@@ -1500,7 +1500,7 @@ reset:
                 */
                send_reset_synqe(TOEDEV(ifp), synqe);
                INP_WUNLOCK(inp);
-               INP_INFO_WUNLOCK(&V_tcbinfo);
+               INP_INFO_RUNLOCK(&V_tcbinfo);
                return (0);
        }
        toep->tid = tid;
@@ -1534,6 +1534,10 @@ reset:
                goto reset;
        }
 
+       /* New connection inpcb is already locked by syncache_expand(). */
+       new_inp = sotoinpcb(so);
+       INP_WLOCK_ASSERT(new_inp);
+
        /*
         * This is for the unlikely case where the syncache entry that we added
         * has been evicted from the syncache, but the syncache_expand above
@@ -1544,20 +1548,18 @@ reset:
         * this somewhat defeats the purpose of having a tod_offload_socket :-(
         */
        if (__predict_false(!(synqe->flags & TPF_SYNQE_EXPANDED))) {
-               struct inpcb *new_inp = sotoinpcb(so);
-
-               INP_WLOCK(new_inp);
                tcp_timer_activate(intotcpcb(new_inp), TT_KEEP, 0);
                t4_offload_socket(TOEDEV(ifp), synqe, so);
-               INP_WUNLOCK(new_inp);
        }
 
+       INP_WUNLOCK(new_inp);
+
        /* Done with the synqe */
        TAILQ_REMOVE(&lctx->synq, synqe, link);
        inp = release_lctx(sc, lctx);
        if (inp != NULL)
                INP_WUNLOCK(inp);
-       INP_INFO_WUNLOCK(&V_tcbinfo);
+       INP_INFO_RUNLOCK(&V_tcbinfo);
        release_synqe(synqe);
 
        return (0);

Modified: head/sys/netinet/in_pcb.c
==============================================================================
--- head/sys/netinet/in_pcb.c   Mon Aug  3 11:57:11 2015        (r286226)
+++ head/sys/netinet/in_pcb.c   Mon Aug  3 12:13:54 2015        (r286227)
@@ -224,6 +224,7 @@ in_pcbinfo_init(struct inpcbinfo *pcbinf
 
        INP_INFO_LOCK_INIT(pcbinfo, name);
        INP_HASH_LOCK_INIT(pcbinfo, "pcbinfohash");     /* XXXRW: argument? */
+       INP_LIST_LOCK_INIT(pcbinfo, "pcbinfolist");
 #ifdef VIMAGE
        pcbinfo->ipi_vnet = curvnet;
 #endif
@@ -262,6 +263,7 @@ in_pcbinfo_destroy(struct inpcbinfo *pcb
        in_pcbgroup_destroy(pcbinfo);
 #endif
        uma_zdestroy(pcbinfo->ipi_zone);
+       INP_LIST_LOCK_DESTROY(pcbinfo);
        INP_HASH_LOCK_DESTROY(pcbinfo);
        INP_INFO_LOCK_DESTROY(pcbinfo);
 }
@@ -276,7 +278,14 @@ in_pcballoc(struct socket *so, struct in
        struct inpcb *inp;
        int error;
 
-       INP_INFO_WLOCK_ASSERT(pcbinfo);
+#ifdef INVARIANTS
+       if (pcbinfo == &V_tcbinfo) {
+               INP_INFO_RLOCK_ASSERT(pcbinfo);
+       } else {
+               INP_INFO_WLOCK_ASSERT(pcbinfo);
+       }
+#endif
+
        error = 0;
        inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT);
        if (inp == NULL)
@@ -308,6 +317,8 @@ in_pcballoc(struct socket *so, struct in
                        inp->inp_flags |= IN6P_IPV6_V6ONLY;
        }
 #endif
+       INP_WLOCK(inp);
+       INP_LIST_WLOCK(pcbinfo);
        LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
        pcbinfo->ipi_count++;
        so->so_pcb = (caddr_t)inp;
@@ -315,9 +326,9 @@ in_pcballoc(struct socket *so, struct in
        if (V_ip6_auto_flowlabel)
                inp->inp_flags |= IN6P_AUTOFLOWLABEL;
 #endif
-       INP_WLOCK(inp);
        inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
        refcount_init(&inp->inp_refcount, 1);   /* Reference from inpcbinfo */
+       INP_LIST_WUNLOCK(pcbinfo);
 #if defined(IPSEC) || defined(MAC)
 out:
        if (error != 0) {
@@ -1246,7 +1257,13 @@ in_pcbfree(struct inpcb *inp)
 
        KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
 
-       INP_INFO_WLOCK_ASSERT(pcbinfo);
+#ifdef INVARIANTS
+       if (pcbinfo == &V_tcbinfo) {
+               INP_INFO_RLOCK_ASSERT(pcbinfo);
+       } else {
+               INP_INFO_WLOCK_ASSERT(pcbinfo);
+       }
+#endif
        INP_WLOCK_ASSERT(inp);
 
        /* XXXRW: Do as much as possible here. */
@@ -1254,8 +1271,10 @@ in_pcbfree(struct inpcb *inp)
        if (inp->inp_sp != NULL)
                ipsec_delete_pcbpolicy(inp);
 #endif
+       INP_LIST_WLOCK(pcbinfo);
        inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
        in_pcbremlists(inp);
+       INP_LIST_WUNLOCK(pcbinfo);
 #ifdef INET6
        if (inp->inp_vflag & INP_IPV6PROTO) {
                ip6_freepcbopts(inp->in6p_outputopts);
@@ -1412,7 +1431,7 @@ in_pcbpurgeif0(struct inpcbinfo *pcbinfo
        struct ip_moptions *imo;
        int i, gap;
 
-       INP_INFO_RLOCK(pcbinfo);
+       INP_INFO_WLOCK(pcbinfo);
        LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
                INP_WLOCK(inp);
                imo = inp->inp_moptions;
@@ -1442,7 +1461,7 @@ in_pcbpurgeif0(struct inpcbinfo *pcbinfo
                }
                INP_WUNLOCK(inp);
        }
-       INP_INFO_RUNLOCK(pcbinfo);
+       INP_INFO_WUNLOCK(pcbinfo);
 }
 
 /*
@@ -2163,8 +2182,16 @@ in_pcbremlists(struct inpcb *inp)
 {
        struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 
-       INP_INFO_WLOCK_ASSERT(pcbinfo);
+#ifdef INVARIANTS
+       if (pcbinfo == &V_tcbinfo) {
+               INP_INFO_RLOCK_ASSERT(pcbinfo);
+       } else {
+               INP_INFO_WLOCK_ASSERT(pcbinfo);
+       }
+#endif
+
        INP_WLOCK_ASSERT(inp);
+       INP_LIST_WLOCK_ASSERT(pcbinfo);
 
        inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
        if (inp->inp_flags & INP_INHASHLIST) {
@@ -2309,13 +2336,13 @@ inp_apply_all(void (*func)(struct inpcb 
 {
        struct inpcb *inp;
 
-       INP_INFO_RLOCK(&V_tcbinfo);
+       INP_INFO_WLOCK(&V_tcbinfo);
        LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) {
                INP_WLOCK(inp);
                func(inp, arg);
                INP_WUNLOCK(inp);
        }
-       INP_INFO_RUNLOCK(&V_tcbinfo);
+       INP_INFO_WUNLOCK(&V_tcbinfo);
 }
 
 struct socket *

Modified: head/sys/netinet/in_pcb.h
==============================================================================
--- head/sys/netinet/in_pcb.h   Mon Aug  3 11:57:11 2015        (r286226)
+++ head/sys/netinet/in_pcb.h   Mon Aug  3 12:13:54 2015        (r286227)
@@ -130,23 +130,35 @@ struct in_conninfo {
 struct icmp6_filter;
 
 /*-
- * struct inpcb captures the network layer state for TCP, UDP, and raw IPv4
- * and IPv6 sockets.  In the case of TCP, further per-connection state is
+ * struct inpcb captures the network layer state for TCP, UDP, and raw IPv4 and
+ * IPv6 sockets.  In the case of TCP and UDP, further per-connection state is
  * hung off of inp_ppcb most of the time.  Almost all fields of struct inpcb
  * are static after creation or protected by a per-inpcb rwlock, inp_lock.  A
- * few fields also require the global pcbinfo lock for the inpcb to be held,
- * when modified, such as the global connection lists and hashes, as well as
- * binding information (which affects which hash a connection is on).  This
- * model means that connections can be looked up without holding the
- * per-connection lock, which is important for performance when attempting to
- * find the connection for a packet given its IP and port tuple.  Writing to
- * these fields that write locks be held on both the inpcb and global locks.
+ * few fields are protected by multiple locks as indicated in the locking notes
+ * below.  For these fields, all of the listed locks must be write-locked for
+ * any modifications.  However, these fields can be safely read while any one 
of
+ * the listed locks are read-locked.  This model can permit greater concurrency
+ * for read operations.  For example, connections can be looked up while only
+ * holding a read lock on the global pcblist lock.  This is important for
+ * performance when attempting to find the connection for a packet given its IP
+ * and port tuple.
+ *
+ * One noteworthy exception is that the global pcbinfo lock follows a different
+ * set of rules in relation to the inp_list field.  Rather than being
+ * write-locked for modifications and read-locked for list iterations, it must
+ * be read-locked during modifications and write-locked during list iterations.
+ * This ensures that the relatively rare global list iterations safely walk a
+ * stable snapshot of connections while allowing more common list modifications
+ * to safely grab the pcblist lock just while adding or removing a connection
+ * from the global list.
  *
  * Key:
  * (c) - Constant after initialization
  * (g) - Protected by the pcbgroup lock
  * (i) - Protected by the inpcb lock
  * (p) - Protected by the pcbinfo lock for the inpcb
+ * (l) - Protected by the pcblist lock for the inpcb
+ * (h) - Protected by the pcbhash lock for the inpcb
  * (s) - Protected by another subsystem's locks
  * (x) - Undefined locking
  *
@@ -161,15 +173,21 @@ struct    icmp6_filter;
  * socket has been freed), or there may be close(2)-related races.
  *
  * The inp_vflag field is overloaded, and would otherwise ideally be (c).
+ *
+ * TODO:  Currently only the TCP stack is leveraging the global pcbinfo lock
+ * read-lock usage during modification, this model can be applied to other
+ * protocols (especially SCTP).
  */
 struct inpcb {
-       LIST_ENTRY(inpcb) inp_hash;     /* (i/p) hash list */
+       LIST_ENTRY(inpcb) inp_hash;     /* (h/i) hash list */
        LIST_ENTRY(inpcb) inp_pcbgrouphash;     /* (g/i) hash list */
-       LIST_ENTRY(inpcb) inp_list;     /* (i/p) list for all PCBs for proto */
+       LIST_ENTRY(inpcb) inp_list;     /* (p/l) list for all PCBs for proto */
+                                       /* (p[w]) for list iteration */
+                                       /* (p[r]/l) for addition/removal */
        void    *inp_ppcb;              /* (i) pointer to per-protocol pcb */
        struct  inpcbinfo *inp_pcbinfo; /* (c) PCB list info */
        struct  inpcbgroup *inp_pcbgroup; /* (g/i) PCB group list */
-       LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/p) group wildcard entry */
+       LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/h) group wildcard entry */
        struct  socket *inp_socket;     /* (i) back pointer to socket */
        struct  ucred   *inp_cred;      /* (c) cache of socket cred */
        u_int32_t inp_flow;             /* (i) IPv6 flow information */
@@ -188,7 +206,7 @@ struct inpcb {
                                         *     general use */
 
        /* Local and foreign ports, local and foreign addr. */
-       struct  in_conninfo inp_inc;    /* (i/p) list for PCB's local port */
+       struct  in_conninfo inp_inc;    /* (i) list for PCB's local port */
 
        /* MAC and IPSEC policy information. */
        struct  label *inp_label;       /* (i) MAC label */
@@ -213,8 +231,8 @@ struct inpcb {
                int     inp6_cksum;
                short   inp6_hops;
        } inp_depend6;
-       LIST_ENTRY(inpcb) inp_portlist; /* (i/p) */
-       struct  inpcbport *inp_phd;     /* (i/p) head of this list */
+       LIST_ENTRY(inpcb) inp_portlist; /* (i/h) */
+       struct  inpcbport *inp_phd;     /* (i/h) head of this list */
 #define inp_zero_size offsetof(struct inpcb, inp_gencnt)
        inp_gen_t       inp_gencnt;     /* (c) generation count */
        struct llentry  *inp_lle;       /* cached L2 information */
@@ -279,37 +297,46 @@ struct inpcbport {
  * Global data structure for each high-level protocol (UDP, TCP, ...) in both
  * IPv4 and IPv6.  Holds inpcb lists and information for managing them.
  *
- * Each pcbinfo is protected by two locks: ipi_lock and ipi_hash_lock,
- * the former covering mutable global fields (such as the global pcb list),
- * and the latter covering the hashed lookup tables.  The lock order is:
+ * Each pcbinfo is protected by three locks: ipi_lock, ipi_hash_lock and
+ * ipi_list_lock:
+ *  - ipi_lock covering the global pcb list stability during loop iteration,
+ *  - ipi_hash_lock covering the hashed lookup tables,
+ *  - ipi_list_lock covering mutable global fields (such as the global
+ *    pcb list)
+ *
+ * The lock order is:
  *
- *    ipi_lock (before) inpcb locks (before) {ipi_hash_lock, pcbgroup locks}
+ *    ipi_lock (before)
+ *        inpcb locks (before)
+ *            ipi_list locks (before)
+ *                {ipi_hash_lock, pcbgroup locks}
  *
  * Locking key:
  *
  * (c) Constant or nearly constant after initialisation
  * (g) Locked by ipi_lock
+ * (l) Locked by ipi_list_lock
  * (h) Read using either ipi_hash_lock or inpcb lock; write requires both
  * (p) Protected by one or more pcbgroup locks
  * (x) Synchronisation properties poorly defined
  */
 struct inpcbinfo {
        /*
-        * Global lock protecting global inpcb list, inpcb count, etc.
+        * Global lock protecting full inpcb list traversal
         */
        struct rwlock            ipi_lock;
 
        /*
         * Global list of inpcbs on the protocol.
         */
-       struct inpcbhead        *ipi_listhead;          /* (g) */
-       u_int                    ipi_count;             /* (g) */
+       struct inpcbhead        *ipi_listhead;          /* (g/l) */
+       u_int                    ipi_count;             /* (l) */
 
        /*
         * Generation count -- incremented each time a connection is allocated
         * or freed.
         */
-       u_quad_t                 ipi_gencnt;            /* (g) */
+       u_quad_t                 ipi_gencnt;            /* (l) */
 
        /*
         * Fields associated with port lookup and allocation.
@@ -367,6 +394,11 @@ struct inpcbinfo {
         * general use 2
         */
        void                    *ipi_pspare[2];
+
+       /*
+        * Global lock protecting global inpcb list, inpcb count, etc.
+        */
+       struct rwlock            ipi_list_lock;
 };
 
 #ifdef _KERNEL
@@ -466,6 +498,25 @@ short      inp_so_options(const struct inpcb 
 #define INP_INFO_WLOCK_ASSERT(ipi)     rw_assert(&(ipi)->ipi_lock, RA_WLOCKED)
 #define INP_INFO_UNLOCK_ASSERT(ipi)    rw_assert(&(ipi)->ipi_lock, RA_UNLOCKED)
 
+#define INP_LIST_LOCK_INIT(ipi, d) \
+        rw_init_flags(&(ipi)->ipi_list_lock, (d), 0)
+#define INP_LIST_LOCK_DESTROY(ipi)  rw_destroy(&(ipi)->ipi_list_lock)
+#define INP_LIST_RLOCK(ipi)     rw_rlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_WLOCK(ipi)     rw_wlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_TRY_RLOCK(ipi) rw_try_rlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_TRY_WLOCK(ipi) rw_try_wlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_TRY_UPGRADE(ipi)       rw_try_upgrade(&(ipi)->ipi_list_lock)
+#define INP_LIST_RUNLOCK(ipi)   rw_runlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_WUNLOCK(ipi)   rw_wunlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_LOCK_ASSERT(ipi) \
+       rw_assert(&(ipi)->ipi_list_lock, RA_LOCKED)
+#define INP_LIST_RLOCK_ASSERT(ipi) \
+       rw_assert(&(ipi)->ipi_list_lock, RA_RLOCKED)
+#define INP_LIST_WLOCK_ASSERT(ipi) \
+       rw_assert(&(ipi)->ipi_list_lock, RA_WLOCKED)
+#define INP_LIST_UNLOCK_ASSERT(ipi) \
+       rw_assert(&(ipi)->ipi_list_lock, RA_UNLOCKED)
+
 #define        INP_HASH_LOCK_INIT(ipi, d) \
        rw_init_flags(&(ipi)->ipi_hash_lock, (d), 0)
 #define        INP_HASH_LOCK_DESTROY(ipi)      
rw_destroy(&(ipi)->ipi_hash_lock)

Modified: head/sys/netinet/tcp_input.c
==============================================================================
--- head/sys/netinet/tcp_input.c        Mon Aug  3 11:57:11 2015        
(r286226)
+++ head/sys/netinet/tcp_input.c        Mon Aug  3 12:13:54 2015        
(r286227)
@@ -608,7 +608,7 @@ tcp_input(struct mbuf **mp, int *offp, i
        char *s = NULL;                 /* address and port logging */
        int ti_locked;
 #define        TI_UNLOCKED     1
-#define        TI_WLOCKED      2
+#define        TI_RLOCKED      2
 
 #ifdef TCPDEBUG
        /*
@@ -797,8 +797,8 @@ tcp_input(struct mbuf **mp, int *offp, i
         * connection in TIMEWAIT and SYNs not targeting a listening socket.
         */
        if ((thflags & (TH_FIN | TH_RST)) != 0) {
-               INP_INFO_WLOCK(&V_tcbinfo);
-               ti_locked = TI_WLOCKED;
+               INP_INFO_RLOCK(&V_tcbinfo);
+               ti_locked = TI_RLOCKED;
        } else
                ti_locked = TI_UNLOCKED;
 
@@ -820,8 +820,8 @@ tcp_input(struct mbuf **mp, int *offp, i
 
 findpcb:
 #ifdef INVARIANTS
-       if (ti_locked == TI_WLOCKED) {
-               INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+       if (ti_locked == TI_RLOCKED) {
+               INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
        } else {
                INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
        }
@@ -969,20 +969,20 @@ findpcb:
 relocked:
        if (inp->inp_flags & INP_TIMEWAIT) {
                if (ti_locked == TI_UNLOCKED) {
-                       if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) {
+                       if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) {
                                in_pcbref(inp);
                                INP_WUNLOCK(inp);
-                               INP_INFO_WLOCK(&V_tcbinfo);
-                               ti_locked = TI_WLOCKED;
+                               INP_INFO_RLOCK(&V_tcbinfo);
+                               ti_locked = TI_RLOCKED;
                                INP_WLOCK(inp);
                                if (in_pcbrele_wlocked(inp)) {
                                        inp = NULL;
                                        goto findpcb;
                                }
                        } else
-                               ti_locked = TI_WLOCKED;
+                               ti_locked = TI_RLOCKED;
                }
-               INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+               INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 
                if (thflags & TH_SYN)
                        tcp_dooptions(&to, optp, optlen, TO_SYN);
@@ -991,7 +991,7 @@ relocked:
                 */
                if (tcp_twcheck(inp, &to, th, m, tlen))
                        goto findpcb;
-               INP_INFO_WUNLOCK(&V_tcbinfo);
+               INP_INFO_RUNLOCK(&V_tcbinfo);
                return (IPPROTO_DONE);
        }
        /*
@@ -1022,16 +1022,16 @@ relocked:
         */
 #ifdef INVARIANTS
        if ((thflags & (TH_FIN | TH_RST)) != 0)
-               INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+               INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 #endif
        if (!((tp->t_state == TCPS_ESTABLISHED && (thflags & TH_SYN) == 0) ||
            (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN)))) {
                if (ti_locked == TI_UNLOCKED) {
-                       if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) {
+                       if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) {
                                in_pcbref(inp);
                                INP_WUNLOCK(inp);
-                               INP_INFO_WLOCK(&V_tcbinfo);
-                               ti_locked = TI_WLOCKED;
+                               INP_INFO_RLOCK(&V_tcbinfo);
+                               ti_locked = TI_RLOCKED;
                                INP_WLOCK(inp);
                                if (in_pcbrele_wlocked(inp)) {
                                        inp = NULL;
@@ -1039,9 +1039,9 @@ relocked:
                                }
                                goto relocked;
                        } else
-                               ti_locked = TI_WLOCKED;
+                               ti_locked = TI_RLOCKED;
                }
-               INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+               INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
        }
 
 #ifdef MAC
@@ -1096,7 +1096,7 @@ relocked:
                 */
                if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) {
 
-                       INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+                       INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
                        /*
                         * Parse the TCP options here because
                         * syncookies need access to the reflected
@@ -1148,7 +1148,11 @@ relocked:
                         */
                        INP_WUNLOCK(inp);       /* listen socket */
                        inp = sotoinpcb(so);
-                       INP_WLOCK(inp);         /* new connection */
+                       /*
+                        * New connection inpcb is already locked by
+                        * syncache_expand().
+                        */
+                       INP_WLOCK_ASSERT(inp);
                        tp = intotcpcb(inp);
                        KASSERT(tp->t_state == TCPS_SYN_RECEIVED,
                            ("%s: ", __func__));
@@ -1379,8 +1383,8 @@ relocked:
                 * Entry added to syncache and mbuf consumed.
                 * Only the listen socket is unlocked by syncache_add().
                 */
-               if (ti_locked == TI_WLOCKED) {
-                       INP_INFO_WUNLOCK(&V_tcbinfo);
+               if (ti_locked == TI_RLOCKED) {
+                       INP_INFO_RUNLOCK(&V_tcbinfo);
                        ti_locked = TI_UNLOCKED;
                }
                INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
@@ -1429,8 +1433,8 @@ relocked:
 dropwithreset:
        TCP_PROBE5(receive, NULL, tp, mtod(m, const char *), tp, th);
 
-       if (ti_locked == TI_WLOCKED) {
-               INP_INFO_WUNLOCK(&V_tcbinfo);
+       if (ti_locked == TI_RLOCKED) {
+               INP_INFO_RUNLOCK(&V_tcbinfo);
                ti_locked = TI_UNLOCKED;
        }
 #ifdef INVARIANTS
@@ -1453,8 +1457,8 @@ dropunlock:
        if (m != NULL)
                TCP_PROBE5(receive, NULL, tp, mtod(m, const char *), tp, th);
 
-       if (ti_locked == TI_WLOCKED) {
-               INP_INFO_WUNLOCK(&V_tcbinfo);
+       if (ti_locked == TI_RLOCKED) {
+               INP_INFO_RUNLOCK(&V_tcbinfo);
                ti_locked = TI_UNLOCKED;
        }
 #ifdef INVARIANTS
@@ -1511,13 +1515,13 @@ tcp_do_segment(struct mbuf *m, struct tc
         */
        if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
            tp->t_state != TCPS_ESTABLISHED) {
-               KASSERT(ti_locked == TI_WLOCKED, ("%s ti_locked %d for "
+               KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for "
                    "SYN/FIN/RST/!EST", __func__, ti_locked));
-               INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+               INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
        } else {
 #ifdef INVARIANTS
-               if (ti_locked == TI_WLOCKED)
-                       INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+               if (ti_locked == TI_RLOCKED)
+                       INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
                else {
                        KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST "
                            "ti_locked: %d", __func__, ti_locked));
@@ -1690,8 +1694,8 @@ tcp_do_segment(struct mbuf *m, struct tc
                                /*
                                 * This is a pure ack for outstanding data.
                                 */
-                               if (ti_locked == TI_WLOCKED)
-                                       INP_INFO_WUNLOCK(&V_tcbinfo);
+                               if (ti_locked == TI_RLOCKED)
+                                       INP_INFO_RUNLOCK(&V_tcbinfo);
                                ti_locked = TI_UNLOCKED;
 
                                TCPSTAT_INC(tcps_predack);
@@ -1794,8 +1798,8 @@ tcp_do_segment(struct mbuf *m, struct tc
                         * nothing on the reassembly queue and we have enough
                         * buffer space to take it.
                         */
-                       if (ti_locked == TI_WLOCKED)
-                               INP_INFO_WUNLOCK(&V_tcbinfo);
+                       if (ti_locked == TI_RLOCKED)
+                               INP_INFO_RUNLOCK(&V_tcbinfo);
                        ti_locked = TI_UNLOCKED;
 
                        /* Clean receiver SACK report if present */
@@ -2031,9 +2035,9 @@ tcp_do_segment(struct mbuf *m, struct tc
                        tcp_state_change(tp, TCPS_SYN_RECEIVED);
                }
 
-               KASSERT(ti_locked == TI_WLOCKED, ("%s: trimthenstep6: "
+               KASSERT(ti_locked == TI_RLOCKED, ("%s: trimthenstep6: "
                    "ti_locked %d", __func__, ti_locked));
-               INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+               INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
                INP_WLOCK_ASSERT(tp->t_inpcb);
 
                /*
@@ -2106,8 +2110,8 @@ tcp_do_segment(struct mbuf *m, struct tc
                    SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) ||
                    (tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) {
 
-                       INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
-                       KASSERT(ti_locked == TI_WLOCKED,
+                       INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+                       KASSERT(ti_locked == TI_RLOCKED,
                            ("%s: TH_RST ti_locked %d, th %p tp %p",
                            __func__, ti_locked, th, tp));
                        KASSERT(tp->t_state != TCPS_SYN_SENT,
@@ -2150,9 +2154,9 @@ tcp_do_segment(struct mbuf *m, struct tc
         * Send challenge ACK for any SYN in synchronized state.
         */
        if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT) {
-               KASSERT(ti_locked == TI_WLOCKED,
+               KASSERT(ti_locked == TI_RLOCKED,
                    ("tcp_do_segment: TH_SYN ti_locked %d", ti_locked));
-               INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+               INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 
                TCPSTAT_INC(tcps_badsyn);
                if (V_tcp_insecure_syn &&
@@ -2265,9 +2269,9 @@ tcp_do_segment(struct mbuf *m, struct tc
         */
        if ((so->so_state & SS_NOFDREF) &&
            tp->t_state > TCPS_CLOSE_WAIT && tlen) {
-               KASSERT(ti_locked == TI_WLOCKED, ("%s: SS_NOFDEREF && "
+               KASSERT(ti_locked == TI_RLOCKED, ("%s: SS_NOFDEREF && "
                    "CLOSE_WAIT && tlen ti_locked %d", __func__, ti_locked));
-               INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+               INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 
                if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
                        log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data "
@@ -2768,9 +2772,9 @@ process_ACK:
                 */
                case TCPS_CLOSING:

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to