I've found one of the causes of the network instability of FreeBSD 7; the tcp syncache fails to retransmit SYN-ACK packets. This causes interesting problems when packet loss is experienced during connection setup. The symptoms that I have witnessed are twofold:

1. If the third part of the 3WHS is lost, the client will believe that the connection is in the ESTABLISHED state, while the server will still have the connection in the syncache. 2. Subsequently, the above syncache entry will stay stuck in the syncache forever. If you attempt to re-use that same 4-tuple, the syncache will ack the new SYN with the old sequence number.

Anyway, the attached patch simplifies the syncache structure a bit and makes it retransmit properly. I'd appreciate testing from anyone who has experienced TCP problems with FreeBSD 7, as well as anyone who is pushing significant traffic through FreeBSD 7.

I'm not interested in FreeBSD 6 testers, since the FreeBSD 6 syncache has a different structure and is not affected by this bug.

FWIW, here's how to prove the existence of the bug. Install nemesis from ports, then use it to send SYN packets at your FreeBSD 7 machine. As of now, you should see only one SYN-ACK reply, and you should also notice that the sysctl net.inet.tcp.syncache.count goes up, but does not come back down.

Once you have applied the patch, you should see the behavior demonstrated below:

From your client machine: (nemesis will pick an IP to spoof, change that
if you wish.)
nemesis tcp -y 80 -D 10.1.1.6

TCP Packet Injected

On your FreeBSD 7 machine:

patrocles# tcpdump -n port 80
tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
listening on nve0, link-type EN10MB (Ethernet), capture size 96 bytes
23:49:02.075118 IP 133.120.85.92.48922 > 10.1.1.6.80: S 
1519649939:1519649939(0) win 4096
23:49:02.075165 IP 10.1.1.6.80 > 133.120.85.92.48922: S 269601671:269601671(0) ack 
1519649940 win 65535 <mss 1460>
23:49:05.164195 IP 10.1.1.6.80 > 133.120.85.92.48922: S 269601671:269601671(0) ack 
1519649940 win 65535 <mss 1460>
23:49:11.264245 IP 10.1.1.6.80 > 133.120.85.92.48922: S 269601671:269601671(0) ack 
1519649940 win 65535 <mss 1460>
23:49:23.364342 IP 10.1.1.6.80 > 133.120.85.92.48922: S 269601671:269601671(0) ack 
1519649940 win 65535 <mss 1460>

Thanks,

Mike "Silby" Silbersack
--- /usr/src/sys.old/netinet/tcp_syncache.c     2007-06-24 20:17:31.000000000 
-0500
+++ /usr/src/sys/netinet/tcp_syncache.c 2007-07-09 00:46:18.000000000 -0500
@@ -149,7 +150,6 @@
        struct mtx      sch_mtx;
        TAILQ_HEAD(sch_head, syncache)  sch_bucket;
        struct callout  sch_timer;
-       int             sch_nextc;
        u_int           sch_length;
        u_int           sch_oddeven;
        u_int32_t       sch_secbits_odd[SYNCOOKIE_SECRET_SIZE];
@@ -240,16 +240,10 @@
 
 #define ENDPTS6_EQ(a, b) (memcmp(a, b, sizeof(*a)) == 0)
 
-#define SYNCACHE_TIMEOUT(sc, sch, co) do {                             \
+#define SYNCACHE_TIMEOUT(sc) do {                                      \
        (sc)->sc_rxmits++;                                              \
        (sc)->sc_rxttime = ticks +                                      \
                TCPTV_RTOBASE * tcp_backoff[(sc)->sc_rxmits - 1];       \
-       if ((sch)->sch_nextc > (sc)->sc_rxttime)                        \
-               (sch)->sch_nextc = (sc)->sc_rxttime;                    \
-       if (!TAILQ_EMPTY(&(sch)->sch_bucket) && !(co))                  \
-               callout_reset(&(sch)->sch_timer,                        \
-                       (sch)->sch_nextc - ticks,                       \
-                       syncache_timer, (void *)(sch));                 \
 } while (0)
 
 #define        SCH_LOCK(sch)           mtx_lock(&(sch)->sch_mtx)
@@ -275,6 +269,7 @@
 syncache_init(void)
 {
        int i;
+       struct syncache_head *sch;
 
        tcp_syncache.cache_count = 0;
        tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE;
@@ -317,6 +312,17 @@
        tcp_syncache.zone = uma_zcreate("syncache", sizeof(struct syncache),
            NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
        uma_zone_set_max(tcp_syncache.zone, tcp_syncache.cache_limit);
+
+       /*
+        * Start the syncache head timers running.  They each run ten times
+        * a second, and are spread out so that they are not all running on
+        * the same clock tick.
+        */
+       for (i = 0; i < tcp_syncache.hashsize; i++) {
+               sch = &tcp_syncache.hashbase[i];
+               callout_reset(&(sch)->sch_timer, i * (hz / 10),    
+                syncache_timer, (void *)(sch));
+       }
 }
 
 /*
@@ -346,8 +352,8 @@
        TAILQ_INSERT_HEAD(&sch->sch_bucket, sc, sc_hash);
        sch->sch_length++;
 
-       /* Reinitialize the bucket row's timer. */
-       SYNCACHE_TIMEOUT(sc, sch, 1);
+       /* Set the retransmit timer for this socket. */
+       SYNCACHE_TIMEOUT(sc);
 
        SCH_UNLOCK(sch);
 
@@ -398,8 +404,6 @@
                 * host does the SYN/ACK->ACK.
                 */
                if (sc->sc_rxttime >= tick) {
-                       if (sc->sc_rxttime < sch->sch_nextc)
-                               sch->sch_nextc = sc->sc_rxttime;
                        continue;
                }
 
@@ -416,11 +420,10 @@
 
                (void) syncache_respond(sc);
                tcpstat.tcps_sc_retransmitted++;
-               SYNCACHE_TIMEOUT(sc, sch, 0);
+               SYNCACHE_TIMEOUT(sc);
        }
-       if (!TAILQ_EMPTY(&(sch)->sch_bucket))
-               callout_reset(&(sch)->sch_timer, (sch)->sch_nextc - tick,
-                       syncache_timer, (void *)(sch));
+       callout_reset(&(sch)->sch_timer, hz / 10,
+               syncache_timer, (void *)(sch));
 }
 
 /*
@@ -1007,7 +1010,7 @@
                    ("%s: label not initialized", __func__));
 #endif
                if (syncache_respond(sc) == 0) {
-                       SYNCACHE_TIMEOUT(sc, sch, 1);
+                       SYNCACHE_TIMEOUT(sc);
                        tcpstat.tcps_sndacks++;
                        tcpstat.tcps_sndtotal++;
                }
_______________________________________________
freebsd-net@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-net
To unsubscribe, send any mail to "[EMAIL PROTECTED]"

Reply via email to