Adds an IOCTL for aborting established TCP connections, and is
designed to be an HA performance improvement for cleaning up, failure 
notification, and application termination.

Signed-off-by:  David Griego <[EMAIL PROTECTED]>
---

 include/linux/ipv6.h          |    8 ++++
 include/linux/socket.h        |    5 ++
 include/linux/sockios.h       |    1 
 include/net/inet_hashtables.h |    6 +++
 net/ipv4/tcp.c                |   15 +++++++
 net/ipv4/tcp_ipv4.c           |   86 +++++++++++++++++++++++++++++++++++++++++
 6 files changed, 121 insertions(+), 0 deletions(-)
---

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index f824113..42f6765 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -467,6 +467,14 @@ #define INET6_MATCH(__sk, __hash, __sadd
         ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \
         (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
 
+#define TCP_IPV6_WILDCARD_MATCH(__sk, __saddr, __sport,__daddr, __dport) \
+       ((ipv6_addr_any(&__saddr)                                       || \
+         ipv6_addr_equal(&__saddr, &inet6_sk(__sk)->saddr))            && \
+        ((!__sport) || (ntohs(__sport) == inet_sk(__sk)->num))         && \
+        (ipv6_addr_any(&__daddr)                                       || \
+          ipv6_addr_equal(&__daddr, &inet6_sk(__sk)->daddr))           && \
+        ((!__dport) || (__dport ==  inet_sk(__sk)->dport)))
+
 #endif /* __KERNEL__ */
 
 #endif /* _IPV6_H */
diff --git a/include/linux/socket.h b/include/linux/socket.h
index fcd35a2..0bf7b0a 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -48,6 +48,11 @@ struct linger {
 
 #define sockaddr_storage __kernel_sockaddr_storage
 
+struct tcp_abort_sockaddr_storage {
+       struct sockaddr_storage local;  /* local address for lookup */
+       struct sockaddr_storage remote; /* Remote address for lookup */
+};
+
 /*
  *     As we do 4.4BSD message passing we use a 4.4BSD message passing
  *     system, not 4.3. Thus msg_accrights(len) are now missing. They
diff --git a/include/linux/sockios.h b/include/linux/sockios.h
index abef759..b850577 100644
--- a/include/linux/sockios.h
+++ b/include/linux/sockios.h
@@ -140,4 +140,5 @@ #define SIOCDEVPRIVATE      0x89F0  /* to 89FF
  */
  
 #define SIOCPROTOPRIVATE 0x89E0 /* to 89EF */
+#define SIOCABORTCONN SIOCPROTOPRIVATE + 1
 #endif /* _LINUX_SOCKIOS_H */
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index d27ee8c..735739a 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -339,6 +339,12 @@ #define INET_TW_MATCH(__sk, __hash,__coo
         (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
 #endif /* 64-bit arch */
 
+#define TCP_IPV4_WILDCARD_MATCH(__sk, __saddr, __sport,__daddr, __dport)    \
+       (((__saddr == INADDR_ANY) || (__saddr ==  inet_sk(__sk)->saddr)) && \
+        ((!__sport) || (ntohs(__sport) == inet_sk(__sk)->num))          && \
+        ((__daddr == INADDR_ANY) || (__daddr == inet_sk(__sk)->daddr))  && \
+        ((!__dport) || (__dport ==  inet_sk(__sk)->dport)))
+
 /*
  * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
  * not check it for lookups anymore, thanks Alexey. -DaveM
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3834b10..b3e7a6f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -442,6 +442,21 @@ int tcp_ioctl(struct sock *sk, int cmd, 
                else
                        answ = tp->write_seq - tp->snd_una;
                break;
+       case SIOCABORTCONN:
+       {
+               struct tcp_abort_sockaddr_storage tcp_abort;
+               void __user *data = (void __user *)arg;
+
+               if (!capable(CAP_NET_ADMIN)){
+                       return -EACCES;
+               }
+
+               /* Check permissions */
+               if (copy_from_user(&tcp_abort, data, sizeof(tcp_abort))) {
+                       return -EFAULT;
+               }
+               return tcp_handle_abort_req(&tcp_abort.local, 
&tcp_abort.remote);
+       }
        default:
                return -ENOIOCTLCMD;
        };
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0ba74bb..add483b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -123,6 +123,91 @@ void tcp_unhash(struct sock *sk)
        inet_unhash(&tcp_hashinfo, sk);
 }
 
+int tcp_handle_abort_req(struct sockaddr_storage *local, 
+               struct sockaddr_storage *remote)
+{
+       int i, ret = -ENOENT;
+
+       /* Check for supported address families */
+       if((local->ss_family != remote->ss_family) ||
+          ((local->ss_family != AF_INET) && (local->ss_family != AF_INET6)))
+               return -EINVAL;
+
+       local_bh_disable();
+
+       /* cycle through all the established connecton buckets */
+       for (i = 0; i < tcp_hashinfo.ehash_size; ++i) {
+               struct sock *sk;
+               struct hlist_node *node;
+restart_bucket:
+
+               read_lock(&tcp_hashinfo.ehash[i].lock);
+               sk_for_each(sk, node, &tcp_hashinfo.ehash[i].chain) {
+                       if (sk->sk_family != local->ss_family) {
+                               continue;
+                       }
+
+                       switch(local->ss_family) {
+                       case AF_INET:
+                               {
+                               struct sockaddr_in *ipv4_local = 
+                                       (struct sockaddr_in *)local;
+                               struct sockaddr_in *ipv4_remote = 
+                                       (struct sockaddr_in *)remote;
+
+                               if(TCP_IPV4_WILDCARD_MATCH(sk,
+                                               ipv4_local->sin_addr.s_addr,
+                                               ipv4_local->sin_port,
+                                               ipv4_remote->sin_addr.s_addr,
+                                               ipv4_remote->sin_port))
+                                       break;
+                               continue;
+                       }
+#ifdef CONFIG_IPV6
+                       case AF_INET6:
+                               {
+                               struct sockaddr_in6 *ipv6_local =
+                                       (struct sockaddr_in6 *)local;
+                               struct sockaddr_in6 *ipv6_remote = 
+                                       (struct sockaddr_in6 *)remote;
+                               
+                               if(TCP_IPV6_WILDCARD_MATCH(sk,
+                                               ipv6_local->sin6_addr,
+                                               ipv6_local->sin6_port,
+                                               ipv6_remote->sin6_addr,
+                                               ipv6_remote->sin6_port))
+                                       break;
+                               continue;
+                       }
+#endif /*CONFIG_IPV6 */
+                       default: /* Not a supported address family */
+                               continue;
+                       }
+
+                       /* Found a match so kill it */
+                       ret = 0;
+                       sock_hold(sk);
+                       read_unlock(&tcp_hashinfo.ehash[i].lock);
+                       local_bh_enable();
+                       tcp_disconnect(sk, O_NONBLOCK);
+                       local_bh_disable();
+                       sock_put(sk);
+
+                       /* Broke link and let go of the list lock, so restart
+                        * our search for matches at the begining of this
+                        * hash bucket
+                        */
+                       goto restart_bucket;
+               }
+
+               read_unlock(&tcp_hashinfo.ehash[i].lock);
+       }
+
+       local_bh_enable();
+
+       return ret;
+}
+
 static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
 {
        return secure_tcp_sequence_number(skb->nh.iph->daddr,
@@ -2462,6 +2547,7 @@ EXPORT_SYMBOL(ipv4_specific);
 EXPORT_SYMBOL(tcp_hashinfo);
 EXPORT_SYMBOL(tcp_prot);
 EXPORT_SYMBOL(tcp_unhash);
+EXPORT_SYMBOL(tcp_handle_abort_req);
 EXPORT_SYMBOL(tcp_v4_conn_request);
 EXPORT_SYMBOL(tcp_v4_connect);
 EXPORT_SYMBOL(tcp_v4_do_rcv);
-- 
1.4.1

-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to