On Tue, Feb 21, 2006 at 12:53:28PM +0100, Beschorner Daniel wrote: > Here is a log since the last ipsec start, do you need more? > "koeln-os" is the affected connection, system seems ok.
Thanks. This and the packet dump confirms that it is the TOS bug I mentioned earlier. When we get an ICMP payload whose TOS is non-zero we will not update any existing route cache entries whose TOS is zero. Since IPsec always uses TOS == 0 to look up route cache entries, it never gets updated. Here is a patch that you can try. It's not perfect since it may extend a single bucket to as many as 16 entries if someone tries to attack you with different TOS values. However, it should solve your specific issue. For the list here is an excerpt from the packet dump: 22:27:00.335359 x.x.x.x > y.y.y.y: ESP(spi=0x95b26645,seq=0x144) (DF) [tos 0x8] 22:27:00.366881 z.z.z.z > x.x.x.x: icmp: y.y.y.y unreachable - need to frag (mtu 1492) 22:27:00.533628 x.x.x.x > y.y.y.y: ESP(spi=0x95b26645,seq=0x145) (DF) [tos 0x8] 22:27:00.578825 z.z.z.z > x.x.x.x: icmp: y.y.y.y unreachable - need to frag (mtu 1492) Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmV>HI~} <[EMAIL PROTECTED]> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- On 2/15/06, Herbert Xu <[EMAIL PROTECTED]> wrote: > On Wed, Feb 15, 2006 at 03:21:50PM +0200, Ilia Sotnikov wrote: > > > > Totally agree but perhaps we should ask the confirmation from someone? > > That's what this list is for :) Send a patch and if there are no objections > it should be go in. Here it is, against 2.6.16-rc3. Signed-off-by: Ilia Sotnikov <[EMAIL PROTECTED]> -- Ilia Sotnikov --- linux-2.6.16-rc3.orig/net/ipv4/route.c 2006-02-16 21:37:05.000000000 +0200 +++ linux-2.6.16-rc3/net/ipv4/route.c 2006-02-16 21:59:26.000000000 +0200 @@ -14,6 +14,8 @@ * Alexey Kuznetsov, <[EMAIL PROTECTED]> * * Fixes: + * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect + * Ilia Sotnikov : Removed TOS from hash calculations * Alan Cox : Verify area fixes. * Alan Cox : cli() protects routing changes * Rui Oliveira : ICMP routing table updates @@ -247,9 +249,9 @@ static int rt_intern_hash(unsigned hash, struct rtable *rth, struct rtable **res); -static unsigned int rt_hash_code(u32 daddr, u32 saddr, u8 tos) +static unsigned int rt_hash_code(u32 daddr, u32 saddr) { - return (jhash_3words(daddr, saddr, (u32) tos, rt_hash_rnd) + return (jhash_2words(daddr, saddr, rt_hash_rnd) & rt_hash_mask); } @@ -1141,8 +1143,7 @@ for (i = 0; i < 2; i++) { for (k = 0; k < 2; k++) { unsigned hash = rt_hash_code(daddr, - skeys[i] ^ (ikeys[k] << 5), - tos); + skeys[i] ^ (ikeys[k] << 5)); rthp=&rt_hash_table[hash].chain; @@ -1152,7 +1153,6 @@ if (rth->fl.fl4_dst != daddr || rth->fl.fl4_src != skeys[i] || - rth->fl.fl4_tos != tos || rth->fl.oif != ikeys[k] || rth->fl.iif != 0) { rthp = &rth->u.rt_next; @@ -1253,8 +1253,7 @@ rt->u.dst.expires) { unsigned hash = rt_hash_code(rt->fl.fl4_dst, rt->fl.fl4_src ^ - (rt->fl.oif << 5), - rt->fl.fl4_tos); + (rt->fl.oif << 5)); #if RT_CACHE_DEBUG >= 1 printk(KERN_DEBUG "ip_rt_advice: redirect to " "%u.%u.%u.%u/%02x dropped\n", @@ -1391,14 +1390,13 @@ struct rtable *rth; u32 skeys[2] = { iph->saddr, 0, }; u32 daddr = iph->daddr; - u8 tos = iph->tos & IPTOS_RT_MASK; unsigned short est_mtu = 0; if (ipv4_config.no_pmtu_disc) return 0; for (i = 0; i < 2; i++) { - unsigned hash = rt_hash_code(daddr, skeys[i], tos); + unsigned hash = rt_hash_code(daddr, skeys[i]); rcu_read_lock(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; @@ -1407,7 +1405,6 @@ rth->fl.fl4_src == skeys[i] && rth->rt_dst == daddr && rth->rt_src == iph->saddr && - rth->fl.fl4_tos == tos && rth->fl.iif == 0 && !(dst_metric_locked(&rth->u.dst, RTAX_MTU))) { unsigned short mtu = new_mtu; @@ -1658,7 +1655,7 @@ RT_CACHE_STAT_INC(in_slow_mc); in_dev_put(in_dev); - hash = rt_hash_code(daddr, saddr ^ (dev->ifindex << 5), tos); + hash = rt_hash_code(daddr, saddr ^ (dev->ifindex << 5)); return rt_intern_hash(hash, rth, (struct rtable**) &skb->dst); e_nobufs: @@ -1823,7 +1820,7 @@ return err; /* put it into the cache */ - hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5), tos); + hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5)); return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); } @@ -1864,7 +1861,7 @@ return err; /* put it into the cache */ - hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5), tos); + hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5)); err = rt_intern_hash(hash, rth, &rtres); if (err) return err; @@ -2041,7 +2038,7 @@ rth->rt_flags &= ~RTCF_LOCAL; } rth->rt_type = res.type; - hash = rt_hash_code(daddr, saddr ^ (fl.iif << 5), tos); + hash = rt_hash_code(daddr, saddr ^ (fl.iif << 5)); err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); goto done; @@ -2088,7 +2085,7 @@ int iif = dev->ifindex; tos &= IPTOS_RT_MASK; - hash = rt_hash_code(daddr, saddr ^ (iif << 5), tos); + hash = rt_hash_code(daddr, saddr ^ (iif << 5)); rcu_read_lock(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; @@ -2286,10 +2283,8 @@ int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); unsigned hash; if (err == 0) { - u32 tos = RT_FL_TOS(oldflp); - hash = rt_hash_code(oldflp->fl4_dst, - oldflp->fl4_src ^ (oldflp->oif << 5), tos); + oldflp->fl4_src ^ (oldflp->oif << 5)); err = rt_intern_hash(hash, rth, rp); } @@ -2334,7 +2329,7 @@ hash = rt_hash_code(oldflp->fl4_dst, oldflp->fl4_src ^ - (oldflp->oif << 5), tos); + (oldflp->oif << 5)); err = rt_intern_hash(hash, rth, rp); /* forward hop information to multipath impl. */ @@ -2563,7 +2558,7 @@ unsigned hash; struct rtable *rth; - hash = rt_hash_code(flp->fl4_dst, flp->fl4_src ^ (flp->oif << 5), flp->fl4_tos); + hash = rt_hash_code(flp->fl4_dst, flp->fl4_src ^ (flp->oif << 5)); rcu_read_lock_bh(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html