Author: jhb
Date: Thu Sep  4 19:09:08 2014
New Revision: 271119
URL: http://svnweb.freebsd.org/changeset/base/271119

Log:
  In tcp_input(), don't acquire the pcbinfo global write lock for SYN
  packets targeting a listening socket.  Permit to reduce TCP input
  processing starvation in context of high SYN load (e.g. short-lived TCP
  connections or SYN flood).
  
  Submitted by: Julien Charbon <jchar...@verisign.com>
  Reviewed by:  adrian, hiren, jhb, Mike Bentkofsky

Modified:
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_syncache.c

Modified: head/sys/netinet/tcp_input.c
==============================================================================
--- head/sys/netinet/tcp_input.c        Thu Sep  4 18:54:01 2014        
(r271118)
+++ head/sys/netinet/tcp_input.c        Thu Sep  4 19:09:08 2014        
(r271119)
@@ -748,12 +748,12 @@ tcp_input(struct mbuf **mp, int *offp, i
 
        /*
         * Locate pcb for segment; if we're likely to add or remove a
-        * connection then first acquire pcbinfo lock.  There are two cases
+        * connection then first acquire pcbinfo lock.  There are three cases
         * where we might discover later we need a write lock despite the
-        * flags: ACKs moving a connection out of the syncache, and ACKs for
-        * a connection in TIMEWAIT.
+        * flags: ACKs moving a connection out of the syncache, ACKs for a
+        * connection in TIMEWAIT and SYNs not targeting a listening socket.
         */
-       if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0) {
+       if ((thflags & (TH_FIN | TH_RST)) != 0) {
                INP_INFO_WLOCK(&V_tcbinfo);
                ti_locked = TI_WLOCKED;
        } else
@@ -982,10 +982,11 @@ relocked:
         * now be in TIMEWAIT.
         */
 #ifdef INVARIANTS
-       if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0)
+       if ((thflags & (TH_FIN | TH_RST)) != 0)
                INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
 #endif
-       if (tp->t_state != TCPS_ESTABLISHED) {
+       if (!((tp->t_state == TCPS_ESTABLISHED && (thflags & TH_SYN) == 0) ||
+           (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN)))) {
                if (ti_locked == TI_UNLOCKED) {
                        if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) {
                                in_pcbref(inp);
@@ -1026,17 +1027,13 @@ relocked:
        /*
         * When the socket is accepting connections (the INPCB is in LISTEN
         * state) we look into the SYN cache if this is a new connection
-        * attempt or the completion of a previous one.  Because listen
-        * sockets are never in TCPS_ESTABLISHED, the V_tcbinfo lock will be
-        * held in this case.
+        * attempt or the completion of a previous one.
         */
        if (so->so_options & SO_ACCEPTCONN) {
                struct in_conninfo inc;
 
                KASSERT(tp->t_state == TCPS_LISTEN, ("%s: so accepting but "
                    "tp not listening", __func__));
-               INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
-
                bzero(&inc, sizeof(inc));
 #ifdef INET6
                if (isipv6) {
@@ -1059,6 +1056,8 @@ relocked:
                 * socket appended to the listen queue in SYN_RECEIVED state.
                 */
                if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) {
+
+                       INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
                        /*
                         * Parse the TCP options here because
                         * syncookies need access to the reflected
@@ -1339,8 +1338,12 @@ relocked:
                syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL);
                /*
                 * Entry added to syncache and mbuf consumed.
-                * Everything already unlocked by syncache_add().
+                * Only the listen socket is unlocked by syncache_add().
                 */
+               if (ti_locked == TI_WLOCKED) {
+                       INP_INFO_WUNLOCK(&V_tcbinfo);
+                       ti_locked = TI_UNLOCKED;
+               }
                INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
                return (IPPROTO_DONE);
        } else if (tp->t_state == TCPS_LISTEN) {

Modified: head/sys/netinet/tcp_syncache.c
==============================================================================
--- head/sys/netinet/tcp_syncache.c     Thu Sep  4 18:54:01 2014        
(r271118)
+++ head/sys/netinet/tcp_syncache.c     Thu Sep  4 19:09:08 2014        
(r271119)
@@ -1118,7 +1118,6 @@ syncache_add(struct in_conninfo *inc, st
        struct syncache scs;
        struct ucred *cred;
 
-       INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
        INP_WLOCK_ASSERT(inp);                  /* listen socket */
        KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN,
            ("%s: unexpected tcp flags", __func__));
@@ -1149,13 +1148,11 @@ syncache_add(struct in_conninfo *inc, st
 #ifdef MAC
        if (mac_syncache_init(&maclabel) != 0) {
                INP_WUNLOCK(inp);
-               INP_INFO_WUNLOCK(&V_tcbinfo);
                goto done;
        } else
                mac_syncache_create(maclabel, inp);
 #endif
        INP_WUNLOCK(inp);
-       INP_INFO_WUNLOCK(&V_tcbinfo);
 
        /*
         * Remember the IP options, if any.
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to