Rui Paulo wrote:
On 17 Nov 2008, at 22:40, Andre Oppermann wrote:
This is a bit more complicated because of interactions with tcp_input()
where syncache_expand() is called from.

The old code (as of December 2002) behaved slightly different.  It would
not remove the syncache entry when (SND.UNA == SEG.ACK) but send a RST.
The (RCV.NXT =< SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND) test wasn't done at
all. Instead a socket was opened whenever (SND.UNA == SEG.ACK) succeeded.
This gave way to the "LAND" DoS attack which was mostly fixed with a test
for (RCV.IRS < SEG.SEQ).

See the attached patch for fixed version of syncache_expand(). This patch is untested though. My development machine is currently down. Harti, Rui
and Bjoern, please have a look at the patch and review it.

--Andre

--- tcp_input.c.1.390    Mon Nov 17 21:33:25 2008
+++ tcp_input.c.1.390.mod    Mon Nov 17 21:35:22 2008
@@ -642,10 +642,13 @@ findpcb:
            if (!syncache_expand(&inc, &to, th, &so, m)) {
                /*
                 * No syncache entry or ACK was not
-                 * for our SYN/ACK.  Send a RST.
+                 * for our SYN/ACK.  Send a RST or
+                 * an ACK for re-synchronization.
                 * NB: syncache did its own logging
                 * of the failure cause.
                 */
+                if (so == 1)
+                    goto dropunlock;
                rstreason = BANDLIM_RST_OPENPORT;
                goto dropwithreset;
            }
--- tcp_syncache.c.1.160    Mon Nov 17 16:49:01 2008
+++ tcp_syncache.c.1.160.mod    Mon Nov 17 23:35:39 2008
@@ -817,59 +817,47 @@ syncache_expand(struct in_conninfo *inc,
    INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
    KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK,
        ("%s: can handle only ACK", __func__));
+    *lsop = NULL;

    sc = syncache_lookup(inc, &sch);    /* returns locked sch */
    SCH_LOCK_ASSERT(sch);
    if (sc == NULL) {
        /*
         * There is no syncache entry, so see if this ACK is
-         * a returning syncookie.  To do this, first:
-         *  A. See if this socket has had a syncache entry dropped in
-         *     the past.  We don't want to accept a bogus syncookie
-         *     if we've never received a SYN.
-         *  B. check that the syncookie is valid.  If it is, then
-         *     cobble up a fake syncache entry, and return.
+         * a returning syncookie.  If the syncookie is valid,
+         * cobble up a fake syncache entry and create a socket.
+         *
+         * NB: Syncache head is locked for the syncookie access.
         */
        if (!tcp_syncookies) {
-            SCH_UNLOCK(sch);
            if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
                log(LOG_DEBUG, "%s; %s: Spurious ACK, "
                    "segment rejected (syncookies disabled)\n",
                    s, __func__);
-            goto failed;
+            goto sendrst;
        }
        bzero(&scs, sizeof(scs));
        sc = syncookie_lookup(inc, sch, &scs, to, th, *lsop);
-        SCH_UNLOCK(sch);
        if (sc == NULL) {
            if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
                log(LOG_DEBUG, "%s; %s: Segment failed "
                    "SYNCOOKIE authentication, segment rejected "
                    "(probably spoofed)\n", s, __func__);
-            goto failed;
+            goto sendrst;
        }
-    } else {
-        /* Pull out the entry to unlock the bucket row. */
-        TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
-        sch->sch_length--;
-        V_tcp_syncache.cache_count--;
-        SCH_UNLOCK(sch);
+        goto expand;        /* fully validated through syncookie */
    }
+    SCH_LOCK_ASSERT(sch);

Why do you need this assert?

Removed.  Was from an earlier iteration with different locking and gotos.

    /*
     * Segment validation:
-     * ACK must match our initial sequence number + 1 (the SYN|ACK).
-     */
-    if (th->th_ack != sc->sc_iss + 1 && !TOEPCB_ISSET(sc)) {
-        if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
-            log(LOG_DEBUG, "%s; %s: ACK %u != ISS+1 %u, segment "
-                "rejected\n", s, __func__, th->th_ack, sc->sc_iss);
-        goto failed;
-    }
-
-    /*
+     *
     * The SEQ must fall in the window starting at the received
     * initial receive sequence number + 1 (the SYN).
+     * If not the segment may be from an earlier connection.  We send
+     * an ACK to re-synchronize the connection and keep the syncache
+     * entry without ajusting its timers.
+     * See RFC793 page 69, first check sequence number [SYN_RECEIVED].
     */
    if ((SEQ_LEQ(th->th_seq, sc->sc_irs) ||
        SEQ_GT(th->th_seq, sc->sc_irs + sc->sc_wnd)) &&
@@ -877,14 +865,41 @@ syncache_expand(struct in_conninfo *inc,
        if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
            log(LOG_DEBUG, "%s; %s: SEQ %u != IRS+1 %u, segment "
                "rejected\n", s, __func__, th->th_seq, sc->sc_irs);
-        goto failed;
+        (void) syncache_respond(sc);
+        *lsop = 1;            /* prevent RST */
+        goto sendrstkeep;
    }

+    /*
+     * ACK must match our initial sequence number + 1 (the SYN|ACK).
+     * If not the segment may be from an earlier connection.  We send
+     * a RST but keep the syncache entry without ajusting its timers.
+     * See RFC793 page 72, fifth check the ACK field, [SYN_RECEIVED].
+     */
+    if (th->th_ack != sc->sc_iss + 1 && !TOEPCB_ISSET(sc)) {
+        if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+            log(LOG_DEBUG, "%s; %s: ACK %u != ISS+1 %u, segment "
+                "rejected\n", s, __func__, th->th_ack, sc->sc_iss);
+        goto sendrstkeep;
+    }
+
+    /*
+     * Remove the entry to unlock the bucket row.
+     * Tests from now on are fatal and remove the syncache entry.
+     */
+    TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
+    sch->sch_length--;
+    V_tcp_syncache.cache_count--;
+
+    /*
+     * If timestamps were not negotiated they must not show up later.
+     * See RFC1312bis, section 1.3, second paragraph
+     */
    if (!(sc->sc_flags & SCF_TIMESTAMP) && (to->to_flags & TOF_TS)) {
        if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
            log(LOG_DEBUG, "%s; %s: Timestamp not expected, "
                "segment rejected\n", s, __func__);
-        goto failed;
+        goto sendrst;
    }
    /*
     * If timestamps were negotiated the reflected timestamp
@@ -896,9 +911,11 @@ syncache_expand(struct in_conninfo *inc,
            log(LOG_DEBUG, "%s; %s: TSECR %u != TS %u, "
                "segment rejected\n",
                s, __func__, to->to_tsecr, sc->sc_ts);
-        goto failed;
+        goto sendrst;
    }

+expand:
+    SCH_UNLOCK(sch);
    *lsop = syncache_socket(sc, *lsop, m);

    if (*lsop == NULL)
@@ -906,16 +923,18 @@ syncache_expand(struct in_conninfo *inc,
    else
        V_tcpstat.tcps_sc_completed++;

-/* how do we find the inp for the new socket? */
    if (sc != &scs)
        syncache_free(sc);
    return (1);
-failed:
-    if (sc != NULL && sc != &scs)
+
+sendrst:
+    if (sc != &scs)
        syncache_free(sc);
+sendrstkeep:
+    SCH_LOCK_ASSERT(sch);
+    SCH_UNLOCK(sch);

Why do we need an assert before an unlock?

This is customary in at least the TCP code.  A failed assert provides more
and better diagnostics than a crash-and-burn failed unlock.

    if (s != NULL)
        free(s, M_TCPLOG);
-    *lsop = NULL;
    return (0);
}


This was probably out of scope:

yep.  Put it in while reading the code.

@@ -1322,6 +1341,8 @@ syncache_respond(struct syncache *sc)
         *
         *    1) path_mtu_discovery is disabled
         *    2) the SCF_UNREACH flag has been set
+         *
+         * XXXAO: The route lookup comment doesn't make sense.
         */
        if (V_path_mtu_discovery && ((sc->sc_flags & SCF_UNREACH) == 0))
               ip->ip_off |= IP_DF;



I won't be able to test this any time soon, so I can't really comment on the rest, but it *looks* okay.

--
Andre

_______________________________________________
freebsd-net@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-net
To unsubscribe, send any mail to "[EMAIL PROTECTED]"

Reply via email to