Adds an IOCTL for aborting established TCP connections, and is designed to be an HA performance improvement for cleaning up, failure notification, and application termination.
Signed-off-by: David Griego <[EMAIL PROTECTED]> --- include/linux/ipv6.h | 8 ++++ include/linux/socket.h | 5 ++ include/linux/sockios.h | 1 include/net/inet_hashtables.h | 6 +++ net/ipv4/tcp.c | 15 +++++++ net/ipv4/tcp_ipv4.c | 86 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 121 insertions(+), 0 deletions(-) --- diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index f824113..42f6765 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -467,6 +467,14 @@ #define INET6_MATCH(__sk, __hash, __sadd ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) +#define TCP_IPV6_WILDCARD_MATCH(__sk, __saddr, __sport,__daddr, __dport) \ + ((ipv6_addr_any(&__saddr) || \ + ipv6_addr_equal(&__saddr, &inet6_sk(__sk)->saddr)) && \ + ((!__sport) || (ntohs(__sport) == inet_sk(__sk)->num)) && \ + (ipv6_addr_any(&__daddr) || \ + ipv6_addr_equal(&__daddr, &inet6_sk(__sk)->daddr)) && \ + ((!__dport) || (__dport == inet_sk(__sk)->dport))) + #endif /* __KERNEL__ */ #endif /* _IPV6_H */ diff --git a/include/linux/socket.h b/include/linux/socket.h index fcd35a2..0bf7b0a 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -48,6 +48,11 @@ struct linger { #define sockaddr_storage __kernel_sockaddr_storage +struct tcp_abort_sockaddr_storage { + struct sockaddr_storage local; /* local address for lookup */ + struct sockaddr_storage remote; /* Remote address for lookup */ +}; + /* * As we do 4.4BSD message passing we use a 4.4BSD message passing * system, not 4.3. Thus msg_accrights(len) are now missing. They diff --git a/include/linux/sockios.h b/include/linux/sockios.h index abef759..b850577 100644 --- a/include/linux/sockios.h +++ b/include/linux/sockios.h @@ -140,4 +140,5 @@ #define SIOCDEVPRIVATE 0x89F0 /* to 89FF */ #define SIOCPROTOPRIVATE 0x89E0 /* to 89EF */ +#define SIOCABORTCONN SIOCPROTOPRIVATE + 1 #endif /* _LINUX_SOCKIOS_H */ diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index d27ee8c..735739a 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -339,6 +339,12 @@ #define INET_TW_MATCH(__sk, __hash,__coo (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) #endif /* 64-bit arch */ +#define TCP_IPV4_WILDCARD_MATCH(__sk, __saddr, __sport,__daddr, __dport) \ + (((__saddr == INADDR_ANY) || (__saddr == inet_sk(__sk)->saddr)) && \ + ((!__sport) || (ntohs(__sport) == inet_sk(__sk)->num)) && \ + ((__daddr == INADDR_ANY) || (__daddr == inet_sk(__sk)->daddr)) && \ + ((!__dport) || (__dport == inet_sk(__sk)->dport))) + /* * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need * not check it for lookups anymore, thanks Alexey. -DaveM diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3834b10..b3e7a6f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -442,6 +442,21 @@ int tcp_ioctl(struct sock *sk, int cmd, else answ = tp->write_seq - tp->snd_una; break; + case SIOCABORTCONN: + { + struct tcp_abort_sockaddr_storage tcp_abort; + void __user *data = (void __user *)arg; + + if (!capable(CAP_NET_ADMIN)){ + return -EACCES; + } + + /* Check permissions */ + if (copy_from_user(&tcp_abort, data, sizeof(tcp_abort))) { + return -EFAULT; + } + return tcp_handle_abort_req(&tcp_abort.local, &tcp_abort.remote); + } default: return -ENOIOCTLCMD; }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0ba74bb..add483b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -123,6 +123,91 @@ void tcp_unhash(struct sock *sk) inet_unhash(&tcp_hashinfo, sk); } +int tcp_handle_abort_req(struct sockaddr_storage *local, + struct sockaddr_storage *remote) +{ + int i, ret = -ENOENT; + + /* Check for supported address families */ + if((local->ss_family != remote->ss_family) || + ((local->ss_family != AF_INET) && (local->ss_family != AF_INET6))) + return -EINVAL; + + local_bh_disable(); + + /* cycle through all the established connecton buckets */ + for (i = 0; i < tcp_hashinfo.ehash_size; ++i) { + struct sock *sk; + struct hlist_node *node; +restart_bucket: + + read_lock(&tcp_hashinfo.ehash[i].lock); + sk_for_each(sk, node, &tcp_hashinfo.ehash[i].chain) { + if (sk->sk_family != local->ss_family) { + continue; + } + + switch(local->ss_family) { + case AF_INET: + { + struct sockaddr_in *ipv4_local = + (struct sockaddr_in *)local; + struct sockaddr_in *ipv4_remote = + (struct sockaddr_in *)remote; + + if(TCP_IPV4_WILDCARD_MATCH(sk, + ipv4_local->sin_addr.s_addr, + ipv4_local->sin_port, + ipv4_remote->sin_addr.s_addr, + ipv4_remote->sin_port)) + break; + continue; + } +#ifdef CONFIG_IPV6 + case AF_INET6: + { + struct sockaddr_in6 *ipv6_local = + (struct sockaddr_in6 *)local; + struct sockaddr_in6 *ipv6_remote = + (struct sockaddr_in6 *)remote; + + if(TCP_IPV6_WILDCARD_MATCH(sk, + ipv6_local->sin6_addr, + ipv6_local->sin6_port, + ipv6_remote->sin6_addr, + ipv6_remote->sin6_port)) + break; + continue; + } +#endif /*CONFIG_IPV6 */ + default: /* Not a supported address family */ + continue; + } + + /* Found a match so kill it */ + ret = 0; + sock_hold(sk); + read_unlock(&tcp_hashinfo.ehash[i].lock); + local_bh_enable(); + tcp_disconnect(sk, O_NONBLOCK); + local_bh_disable(); + sock_put(sk); + + /* Broke link and let go of the list lock, so restart + * our search for matches at the begining of this + * hash bucket + */ + goto restart_bucket; + } + + read_unlock(&tcp_hashinfo.ehash[i].lock); + } + + local_bh_enable(); + + return ret; +} + static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) { return secure_tcp_sequence_number(skb->nh.iph->daddr, @@ -2462,6 +2547,7 @@ EXPORT_SYMBOL(ipv4_specific); EXPORT_SYMBOL(tcp_hashinfo); EXPORT_SYMBOL(tcp_prot); EXPORT_SYMBOL(tcp_unhash); +EXPORT_SYMBOL(tcp_handle_abort_req); EXPORT_SYMBOL(tcp_v4_conn_request); EXPORT_SYMBOL(tcp_v4_connect); EXPORT_SYMBOL(tcp_v4_do_rcv); -- 1.4.1 - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html