On Mon, 2015-12-21 at 17:20 +0100, Florian Westphal wrote: > Hannes points out that when we generate tcp reset for timewait sockets we > pretend we found no socket and pass NULL sk to tcp_vX_send_reset(). > > Make it cope with inet tw sockets and then provide tw sk so RST appears on > correct interface. > > Packetdrill test case: > // want default route to be used, we rely on BINDTODEVICE > `ip route del 192.0.2.0/24 via 192.168.0.2 dev tun0` > > 0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 > 0.001 setsockopt(3, SOL_SOCKET, SO_BINDTODEVICE, "tun0", 4) = 0 > 0.100...0.200 connect(3, ..., ...) = 0 > > 0.100 > S 0:0(0) <mss 1460,sackOK,nop,nop> > 0.200 < S. 0:0(0) ack 1 win 32792 <mss 1460,sackOK,nop,nop> > 0.200 > . 1:1(0) ack 1 > > 0.210 close(3) = 0 > > 0.210 > F. 1:1(0) ack 1 win 29200 > 0.300 < . 1:1(0) ack 2 win 46 > > // more data while in FIN_WAIT2, expect RST > 1.300 < P. 1:1001(1000) ack 1 win 46 > > // fails without this change -- default route is used > 1.301 > R 1:1(0) win 0 > > Reported-by: Hannes Frederic Sowa <han...@stressinduktion.org> > Signed-off-by: Florian Westphal <f...@strlen.de> > --- > net/ipv4/tcp_ipv4.c | 31 ++++++++++++++++++++++--------- > net/ipv4/tcp_minisocks.c | 7 ++----- > net/ipv6/tcp_ipv6.c | 15 +++++++++++---- > 3 files changed, 35 insertions(+), 18 deletions(-) > > diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c > index 46e92fb..24ba2e1 100644 > --- a/net/ipv4/tcp_ipv4.c > +++ b/net/ipv4/tcp_ipv4.c > @@ -587,13 +587,14 @@ static void tcp_v4_send_reset(const struct sock *sk, > struct sk_buff *skb) > } rep; > struct ip_reply_arg arg; > #ifdef CONFIG_TCP_MD5SIG > - struct tcp_md5sig_key *key; > + struct tcp_md5sig_key *key = NULL; > const __u8 *hash_location = NULL; > unsigned char newhash[16]; > int genhash; > struct sock *sk1 = NULL; > #endif > struct net *net; > + bool have_full_sk; > > /* Never send a reset in response to a reset. */ > if (th->rst) > @@ -624,10 +625,14 @@ static void tcp_v4_send_reset(const struct sock *sk, > struct sk_buff *skb) > arg.iov[0].iov_base = (unsigned char *)&rep; > arg.iov[0].iov_len = sizeof(rep.th); > > - net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); > + have_full_sk = sk && sk_fullsock(sk); > + net = have_full_sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
But the net pointer can be derived from timewait the same, not sure why you changed this part ... This makes your patch look more complicated than what is needed. > #ifdef CONFIG_TCP_MD5SIG > hash_location = tcp_parse_md5sig_option(th); > - if (!sk && hash_location) { > + if (have_full_sk) { > + key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *) > + &ip_hdr(skb)->saddr, AF_INET); > + } else if (hash_location) { > /* > * active side is lost. Try to find listening socket through > * source port, and then find md5 key through listening socket. > @@ -651,10 +656,6 @@ static void tcp_v4_send_reset(const struct sock *sk, > struct sk_buff *skb) > genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb); > if (genhash || memcmp(hash_location, newhash, 16) != 0) > goto release_sk1; > - } else { > - key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *) > - &ip_hdr(skb)->saddr, > - AF_INET) : NULL; > } > > if (key) { > @@ -675,7 +676,14 @@ static void tcp_v4_send_reset(const struct sock *sk, > struct sk_buff *skb) > ip_hdr(skb)->saddr, /* XXX */ > arg.iov[0].iov_len, IPPROTO_TCP, 0); > arg.csumoffset = offsetof(struct tcphdr, check) / 2; > - arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK > : 0; > + arg.flags = 0; > + if (have_full_sk) { > + if (inet_sk(sk)->transparent) > + arg.flags = IP_REPLY_ARG_NOSRCCHECK; > + } else if (sk && inet_twsk(sk)->tw_transparent) { > + arg.flags = IP_REPLY_ARG_NOSRCCHECK; > + } > + Maybe a helper to retrieve the transparant status from a generic socket (being full, timewait or request sock) would help. This could be submitted as a separate patch to ease review. > /* When socket is gone, all binding information is lost. > * routing might fail in this case. No choice here, if we choose to > force > * input interface, we will misroute in case of asymmetric route. > @@ -683,6 +691,9 @@ static void tcp_v4_send_reset(const struct sock *sk, > struct sk_buff *skb) > if (sk) > arg.bound_dev_if = sk->sk_bound_dev_if; > > + BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) != > + offsetof(struct inet_timewait_sock, tw_bound_dev_if)); > + > arg.tos = ip_hdr(skb)->tos; > ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), > skb, &TCP_SKB_CB(skb)->header.h4.opt, > @@ -1706,7 +1717,9 @@ do_time_wait: > tcp_v4_timewait_ack(sk, skb); > break; > case TCP_TW_RST: > - goto no_tcp_socket; > + tcp_v4_send_reset(sk, skb); > + inet_twsk_deschedule_put(inet_twsk(sk)); > + goto discard_it; > case TCP_TW_SUCCESS:; > } > goto discard_it; > diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c > index ac6b196..75632a9 100644 > --- a/net/ipv4/tcp_minisocks.c > +++ b/net/ipv4/tcp_minisocks.c > @@ -131,7 +131,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, > struct sk_buff *skb, > goto kill; > > if (th->syn && !before(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt)) > - goto kill_with_rst; > + return TCP_TW_RST; > > /* Dup ACK? */ > if (!th->ack || > @@ -145,11 +145,8 @@ tcp_timewait_state_process(struct inet_timewait_sock > *tw, struct sk_buff *skb, > * reset. > */ > if (!th->fin || > - TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) { > -kill_with_rst: > - inet_twsk_deschedule_put(tw); > + TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) > return TCP_TW_RST; > - } > > /* FIN arrived, enter true time-wait state. */ > tw->tw_substate = TCP_TIME_WAIT; > diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c > index f03d2b0..2637b61 100644 > --- a/net/ipv6/tcp_ipv6.c > +++ b/net/ipv6/tcp_ipv6.c > @@ -841,6 +841,7 @@ static void tcp_v6_send_reset(const struct sock *sk, > struct sk_buff *skb) > int genhash; > struct sock *sk1 = NULL; > #endif > + bool have_full_sk; > int oif; > > if (th->rst) > @@ -852,9 +853,12 @@ static void tcp_v6_send_reset(const struct sock *sk, > struct sk_buff *skb) > if (!sk && !ipv6_unicast_destination(skb)) > return; > > + have_full_sk = sk && sk_fullsock(sk); > #ifdef CONFIG_TCP_MD5SIG > hash_location = tcp_parse_md5sig_option(th); > - if (!sk && hash_location) { > + if (have_full_sk) { > + key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr); > + } else if (hash_location) { > /* > * active side is lost. Try to find listening socket through > * source port, and then find md5 key through listening socket. > @@ -877,8 +881,6 @@ static void tcp_v6_send_reset(const struct sock *sk, > struct sk_buff *skb) > genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb); > if (genhash || memcmp(hash_location, newhash, 16) != 0) > goto release_sk1; > - } else { > - key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : NULL; > } > #endif > > @@ -889,6 +891,9 @@ static void tcp_v6_send_reset(const struct sock *sk, > struct sk_buff *skb) > (th->doff << 2); > > oif = sk ? sk->sk_bound_dev_if : 0; > + if (!have_full_sk) > + sk = NULL; > + I have no idea why you need to set sk to NULL here. This seems not related to this patch. I found this hard to review... It seems you have multiple logical changes ? Splitting into at least 2 patches would be nice. > tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0); > > #ifdef CONFIG_TCP_MD5SIG > @@ -1516,7 +1521,9 @@ do_time_wait: > break; > case TCP_TW_RST: > tcp_v6_restore_cb(skb); > - goto no_tcp_socket; > + tcp_v6_send_reset(sk, skb); > + inet_twsk_deschedule_put(inet_twsk(sk)); > + goto discard_it; > case TCP_TW_SUCCESS: > ; > } -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html