More complete version of local port range checking. 1. Enforce that low < high when setting. 2. Use seqlock to ensure atomic update. 3. Add port randomization to SCTP. This is a new feature but easier than maintaining old code that was broken if range changed.
Signed-off-by: Stephen Hemminger <[EMAIL PROTECTED]> --- drivers/infiniband/core/cma.c | 24 ++++++------ include/net/ip.h | 3 + net/ipv4/inet_connection_sock.c | 26 ++++++++++--- net/ipv4/inet_hashtables.c | 13 +++--- net/ipv4/sysctl_net_ipv4.c | 77 ++++++++++++++++++++++++++++++++++++---- net/ipv4/tcp_ipv4.c | 1 net/ipv4/udp.c | 18 ++++----- net/ipv6/inet6_hashtables.c | 13 +++--- net/sctp/protocol.c | 1 net/sctp/socket.c | 26 ++++--------- security/selinux/hooks.c | 37 ++++++++++--------- 11 files changed, 157 insertions(+), 82 deletions(-) --- a/include/net/ip.h 2007-10-10 08:26:57.000000000 -0700 +++ b/include/net/ip.h 2007-10-10 09:35:26.000000000 -0700 @@ -171,7 +171,8 @@ extern unsigned long snmp_fold_field(voi extern int snmp_mib_init(void *ptr[2], size_t mibsize, size_t mibalign); extern void snmp_mib_free(void *ptr[2]); -extern int sysctl_local_port_range[2]; +extern void inet_get_local_port_range(int range[2]); + extern int sysctl_ip_default_ttl; extern int sysctl_ip_nonlocal_bind; --- a/net/ipv4/inet_connection_sock.c 2007-10-10 09:29:03.000000000 -0700 +++ b/net/ipv4/inet_connection_sock.c 2007-10-10 09:52:49.000000000 -0700 @@ -33,6 +33,19 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg); * This array holds the first and last local port number. */ int sysctl_local_port_range[2] = { 32768, 61000 }; +DEFINE_SEQLOCK(sysctl_port_range_lock); + +void inet_get_local_port_range(int range[2]) +{ + unsigned seq; + do { + seq = read_seqbegin(&sysctl_port_range_lock); + + range[0] = sysctl_local_port_range[0]; + range[1] = sysctl_local_port_range[1]; + } while (read_seqretry(&sysctl_port_range_lock, seq)); +} +EXPORT_SYMBOL(inet_get_local_port_range); int inet_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb) @@ -77,10 +90,11 @@ int inet_csk_get_port(struct inet_hashin local_bh_disable(); if (!snum) { - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; - int remaining = (high - low) + 1; - int rover = net_random() % (high - low) + low; + int remaining, range[2], rover; + + inet_get_local_port_range(range); + remaining = range[1] - range[0]; + rover = net_random() % (range[1] - range[0]) + range[0]; do { head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; @@ -91,8 +105,8 @@ int inet_csk_get_port(struct inet_hashin break; next: spin_unlock(&head->lock); - if (++rover > high) - rover = low; + if (++rover > range[1]) + rover = range[0]; } while (--remaining > 0); /* Exhausted local port range during search? It is not --- a/net/ipv4/inet_hashtables.c 2007-10-10 09:27:02.000000000 -0700 +++ b/net/ipv4/inet_hashtables.c 2007-10-10 09:40:39.000000000 -0700 @@ -279,19 +279,18 @@ int inet_hash_connect(struct inet_timewa int ret; if (!snum) { - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; - int range = high - low; - int i; - int port; + int i, count, range[2], port; static u32 hint; u32 offset = hint + inet_sk_port_offset(sk); struct hlist_node *node; struct inet_timewait_sock *tw = NULL; + inet_get_local_port_range(range); + count = range[1] - range[0]; + local_bh_disable(); - for (i = 1; i <= range; i++) { - port = low + (i + offset) % range; + for (i = 1; i <= count; i++) { + port = range[0] + (i + offset) % count; head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)]; spin_lock(&head->lock); --- a/net/ipv4/sysctl_net_ipv4.c 2007-10-10 08:27:00.000000000 -0700 +++ b/net/ipv4/sysctl_net_ipv4.c 2007-10-10 09:46:12.000000000 -0700 @@ -12,6 +12,7 @@ #include <linux/sysctl.h> #include <linux/igmp.h> #include <linux/inetdevice.h> +#include <linux/seqlock.h> #include <net/snmp.h> #include <net/icmp.h> #include <net/ip.h> @@ -25,8 +26,6 @@ extern int sysctl_ip_nonlocal_bind; #ifdef CONFIG_SYSCTL static int zero; static int tcp_retr1_max = 255; -static int ip_local_port_range_min[] = { 1, 1 }; -static int ip_local_port_range_max[] = { 65535, 65535 }; #endif struct ipv4_config ipv4_config; @@ -89,6 +88,74 @@ static int ipv4_sysctl_forward_strategy( return 1; } +extern seqlock_t sysctl_port_range_lock; +extern int sysctl_local_port_range[2]; + +static int local_min_port[2] = { 1, 1 }; +static int local_max_port[2] = { 65535, 65535 }; + +static void set_local_port_range(const int range[2]) +{ + write_seqlock(&sysctl_port_range_lock); + sysctl_local_port_range[0] = range[0]; + sysctl_local_port_range[1] = range[1]; + write_sequnlock(&sysctl_port_range_lock); +} + +static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int ret; + int range[2] = { sysctl_local_port_range[0], + sysctl_local_port_range[1] }; + ctl_table tmp = { + .data = &range, + .maxlen = sizeof(range), + .mode = table->mode, + .extra1 = &local_min_port, + .extra2 = &local_max_port, + }; + + ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos); + + if (write && ret == 0) { + if (range[1] <= range[0]) + ret = -EINVAL; + else + set_local_port_range(range); + } + + return ret; +} + +static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name, + int nlen, void __user *oldval, + size_t __user *oldlenp, + void __user *newval, size_t newlen) +{ + int ret; + int range[2] = { sysctl_local_port_range[0], + sysctl_local_port_range[1] }; + ctl_table tmp = { + .data = &range, + .maxlen = sizeof(range), + .mode = table->mode, + .extra1 = &local_min_port, + .extra2 = &local_max_port, + }; + + ret = sysctl_intvec(&tmp, name, nlen, oldval, oldlenp, newval, newlen); + if (ret == 0 && newval && newlen) { + if (range[1] <= range[0]) + ret = -EINVAL; + else + set_local_port_range(range); + } + return ret; +} + + static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -427,10 +494,8 @@ ctl_table ipv4_table[] = { .data = &sysctl_local_port_range, .maxlen = sizeof(sysctl_local_port_range), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = ip_local_port_range_min, - .extra2 = ip_local_port_range_max + .proc_handler = &ipv4_local_port_range, + .strategy = &ipv4_sysctl_local_port_range, }, { .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_ALL, --- a/net/ipv4/tcp_ipv4.c 2007-10-10 08:27:00.000000000 -0700 +++ b/net/ipv4/tcp_ipv4.c 2007-10-10 09:41:16.000000000 -0700 @@ -2470,6 +2470,5 @@ EXPORT_SYMBOL(tcp_v4_syn_recv_sock); EXPORT_SYMBOL(tcp_proc_register); EXPORT_SYMBOL(tcp_proc_unregister); #endif -EXPORT_SYMBOL(sysctl_local_port_range); EXPORT_SYMBOL(sysctl_tcp_low_latency); --- a/net/ipv4/udp.c 2007-10-10 08:27:00.000000000 -0700 +++ b/net/ipv4/udp.c 2007-10-10 09:44:35.000000000 -0700 @@ -147,13 +147,13 @@ int __udp_lib_get_port(struct sock *sk, write_lock_bh(&udp_hash_lock); if (!snum) { - int i; - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; + int i, range[2]; unsigned rover, best, best_size_so_far; + inet_get_local_port_range(range); + best_size_so_far = UINT_MAX; - best = rover = net_random() % (high - low) + low; + best = rover = net_random() % (range[1] - range[0]) + range[0]; /* 1st pass: look for empty (or shortest) hash chain */ for (i = 0; i < UDP_HTABLE_SIZE; i++) { @@ -171,11 +171,9 @@ int __udp_lib_get_port(struct sock *sk, best = rover; next: /* fold back if end of range */ - if (++rover > high) - rover = low + ((rover - low) + if (++rover > range[1]) + rover = range[0] + ((rover - range[0]) & (UDP_HTABLE_SIZE - 1)); - - } /* 2nd pass: find hole in shortest hash chain */ @@ -184,8 +182,8 @@ int __udp_lib_get_port(struct sock *sk, if (! __udp_lib_lport_inuse(rover, udptable)) goto gotit; rover += UDP_HTABLE_SIZE; - if (rover > high) - rover = low + ((rover - low) + if (rover > range[1]) + rover = range[0] + ((rover - range[0]) & (UDP_HTABLE_SIZE - 1)); } --- a/net/ipv6/inet6_hashtables.c 2007-10-10 08:27:00.000000000 -0700 +++ b/net/ipv6/inet6_hashtables.c 2007-10-10 09:39:48.000000000 -0700 @@ -254,18 +254,19 @@ int inet6_hash_connect(struct inet_timew int ret; if (snum == 0) { - const int low = sysctl_local_port_range[0]; - const int high = sysctl_local_port_range[1]; - const int range = high - low; - int i, port; + int range[2]; + int i, port, count; static u32 hint; const u32 offset = hint + inet6_sk_port_offset(sk); struct hlist_node *node; struct inet_timewait_sock *tw = NULL; + inet_get_local_port_range(range); + count = range[1] - range[0]; + local_bh_disable(); - for (i = 1; i <= range; i++) { - port = low + (i + offset) % range; + for (i = 1; i <= count; i++) { + port = range[0] + (i + offset) % count; head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)]; spin_lock(&head->lock); --- a/security/selinux/hooks.c 2007-10-10 08:27:01.000000000 -0700 +++ b/security/selinux/hooks.c 2007-10-10 09:50:09.000000000 -0700 @@ -3232,8 +3232,6 @@ static int selinux_socket_post_create(st /* Range of port numbers used to automatically bind. Need to determine whether we should perform a name_bind permission check between the socket and the port number. */ -#define ip_local_port_range_0 sysctl_local_port_range[0] -#define ip_local_port_range_1 sysctl_local_port_range[1] static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen) { @@ -3276,20 +3274,27 @@ static int selinux_socket_bind(struct so addrp = (char *)&addr6->sin6_addr.s6_addr; } - if (snum&&(snum < max(PROT_SOCK,ip_local_port_range_0) || - snum > ip_local_port_range_1)) { - err = security_port_sid(sk->sk_family, sk->sk_type, - sk->sk_protocol, snum, &sid); - if (err) - goto out; - AVC_AUDIT_DATA_INIT(&ad,NET); - ad.u.net.sport = htons(snum); - ad.u.net.family = family; - err = avc_has_perm(isec->sid, sid, - isec->sclass, - SOCKET__NAME_BIND, &ad); - if (err) - goto out; + if (snum) { + int range[2]; + + inet_get_local_port_range(range); + + if (snum < max(PROT_SOCK, range[0]) || snum > range[1]) { + err = security_port_sid(sk->sk_family, + sk->sk_type, + sk->sk_protocol, snum, + &sid); + if (err) + goto out; + AVC_AUDIT_DATA_INIT(&ad,NET); + ad.u.net.sport = htons(snum); + ad.u.net.family = family; + err = avc_has_perm(isec->sid, sid, + isec->sclass, + SOCKET__NAME_BIND, &ad); + if (err) + goto out; + } } switch(isec->sclass) { --- a/drivers/infiniband/core/cma.c 2007-10-10 08:26:39.000000000 -0700 +++ b/drivers/infiniband/core/cma.c 2007-10-10 10:01:10.000000000 -0700 @@ -1866,13 +1866,14 @@ err1: static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv) { struct rdma_bind_list *bind_list; - int port, ret; + int port, ret, range[2]; bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL); if (!bind_list) return -ENOMEM; retry: + /* FIXME: add proper port randomization */ do { ret = idr_get_new_above(ps, bind_list, next_port, &port); } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL)); @@ -1880,18 +1881,20 @@ retry: if (ret) goto err1; - if (port > sysctl_local_port_range[1]) { - if (next_port != sysctl_local_port_range[0]) { + inet_get_local_port_range(range); + + if (port > range[1]) { + if (next_port != range[0]) { idr_remove(ps, port); - next_port = sysctl_local_port_range[0]; + next_port = range[0]; goto retry; } ret = -EADDRNOTAVAIL; goto err2; } - if (port == sysctl_local_port_range[1]) - next_port = sysctl_local_port_range[0]; + if (port == range[1]) + next_port = range[0]; else next_port = port + 1; @@ -2769,12 +2772,11 @@ static void cma_remove_one(struct ib_dev static int cma_init(void) { - int ret; + int ret, range[2]; + + inet_get_local_port_range(range); + next_port = net_random() % (range[1] - range[0]) + range[0]; - get_random_bytes(&next_port, sizeof next_port); - next_port = ((unsigned int) next_port % - (sysctl_local_port_range[1] - sysctl_local_port_range[0])) + - sysctl_local_port_range[0]; cma_wq = create_singlethread_workqueue("rdma_cm"); if (!cma_wq) return -ENOMEM; --- a/net/sctp/protocol.c 2007-10-10 08:27:00.000000000 -0700 +++ b/net/sctp/protocol.c 2007-10-10 09:58:21.000000000 -0700 @@ -1173,7 +1173,6 @@ SCTP_STATIC __init int sctp_init(void) } spin_lock_init(&sctp_port_alloc_lock); - sctp_port_rover = sysctl_local_port_range[0] - 1; printk(KERN_INFO "SCTP: Hash tables configured " "(established %d bind %d)\n", --- a/net/sctp/socket.c 2007-10-10 08:27:00.000000000 -0700 +++ b/net/sctp/socket.c 2007-10-10 10:01:42.000000000 -0700 @@ -5314,26 +5314,19 @@ static long sctp_get_port_local(struct s sctp_local_bh_disable(); if (snum == 0) { - /* Search for an available port. - * - * 'sctp_port_rover' was the last port assigned, so - * we start to search from 'sctp_port_rover + - * 1'. What we do is first check if port 'rover' is - * already in the hash table; if not, we use that; if - * it is, we try next. - */ - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; - int remaining = (high - low) + 1; - int rover; - int index; + /* Search for an available port. */ + int index, rover, remaining, range[2]; + + inet_get_local_port_range(range); + remaining = range[1] - range[0]; + rover = net_random() % remaining + range[0]; sctp_spin_lock(&sctp_port_alloc_lock); - rover = sctp_port_rover; do { rover++; - if ((rover < low) || (rover > high)) - rover = low; + if ((rover < range[0]) || (rover > range[1])) + rover = range[0]; + index = sctp_phashfn(rover); head = &sctp_port_hashtable[index]; sctp_spin_lock(&head->lock); @@ -5344,7 +5337,6 @@ static long sctp_get_port_local(struct s next: sctp_spin_unlock(&head->lock); } while (--remaining > 0); - sctp_port_rover = rover; sctp_spin_unlock(&sctp_port_alloc_lock); /* Exhausted local port range during search? */ - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html