The patch below reworks socket.c to use fget_light() in place of fget() for the various networking syscalls. This is of particular value with SMP kernels running on the P4 as the memory barriers the atomic ops on the file counter causes result in the CPU having to wait for quite a few memory transactions to complete (we can have up to ~128 instructions in flight).
On the default netperf (which uses fairly large buffers) the increase is around 25Mbit/s (on 8000Mbit/s). Using 'netperf -- -m 64' performance goes to ~284Mbit/s from 247Mbit/s, which is pretty substantial. -ben Signed-off-by: Benjamin LaHaise <benjamin.c.lahaise> diff --git a/net/socket.c b/net/socket.c index a00851f..64019f8 100644 --- a/net/socket.c +++ b/net/socket.c @@ -414,6 +414,28 @@ out: return fd; } +static struct socket *sock_from_file(struct file *file, int *err) +{ + struct inode *inode; + struct socket *sock; + + if (file->f_op == &socket_file_ops) + return file->private_data; /* set in sock_map_fd */ + + inode = file->f_dentry->d_inode; + if (!S_ISSOCK(inode->i_mode)) { + *err = -ENOTSOCK; + return NULL; + } + + sock = SOCKET_I(inode); + if (sock->file != file) { + printk(KERN_ERR "socki_lookup: socket file changed!\n"); + sock->file = file; + } + return sock; +} + /** * sockfd_lookup - Go from a file number to its socket slot * @fd: file handle @@ -430,31 +452,31 @@ out: struct socket *sockfd_lookup(int fd, int *err) { struct file *file; - struct inode *inode; struct socket *sock; - if (!(file = fget(fd))) - { + if (!(file = fget(fd))) { *err = -EBADF; return NULL; } - - if (file->f_op == &socket_file_ops) - return file->private_data; /* set in sock_map_fd */ - - inode = file->f_dentry->d_inode; - if (!S_ISSOCK(inode->i_mode)) { - *err = -ENOTSOCK; + sock = sock_from_file(file, err); + if (!sock) fput(file); - return NULL; - } + return sock; +} - sock = SOCKET_I(inode); - if (sock->file != file) { - printk(KERN_ERR "socki_lookup: socket file changed!\n"); - sock->file = file; +static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) +{ + struct file *file; + struct socket *sock; + + file = fget_light(fd, fput_needed); + if (file) { + sock = sock_from_file(file, err); + if (sock) + return sock; + fput_light(file, *fput_needed); } - return sock; + return NULL; } /** @@ -1289,19 +1311,17 @@ asmlinkage long sys_bind(int fd, struct { struct socket *sock; char address[MAX_SOCK_ADDR]; - int err; + int err, fput_needed; - if((sock = sockfd_lookup(fd,&err))!=NULL) + if((sock = sockfd_lookup_light(fd, &err, &fput_needed))!=NULL) { if((err=move_addr_to_kernel(umyaddr,addrlen,address))>=0) { err = security_socket_bind(sock, (struct sockaddr *)address, addrlen); - if (err) { - sockfd_put(sock); - return err; - } - err = sock->ops->bind(sock, (struct sockaddr *)address, addrlen); + if (!err) + err = sock->ops->bind(sock, + (struct sockaddr *)address, addrlen); } - sockfd_put(sock); + fput_light(sock->file, fput_needed); } return err; } @@ -1318,20 +1338,17 @@ int sysctl_somaxconn = SOMAXCONN; asmlinkage long sys_listen(int fd, int backlog) { struct socket *sock; - int err; + int err, fput_needed; - if ((sock = sockfd_lookup(fd, &err)) != NULL) { + if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) { if ((unsigned) backlog > sysctl_somaxconn) backlog = sysctl_somaxconn; err = security_socket_listen(sock, backlog); - if (err) { - sockfd_put(sock); - return err; - } + if (!err) + err = sock->ops->listen(sock, backlog); - err=sock->ops->listen(sock, backlog); - sockfd_put(sock); + fput_light(sock->file, fput_needed); } return err; } @@ -1352,10 +1369,10 @@ asmlinkage long sys_listen(int fd, int b asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen) { struct socket *sock, *newsock; - int err, len; + int err, len, fput_needed; char address[MAX_SOCK_ADDR]; - sock = sockfd_lookup(fd, &err); + sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; @@ -1398,7 +1415,7 @@ asmlinkage long sys_accept(int fd, struc security_socket_post_accept(sock, newsock); out_put: - sockfd_put(sock); + fput_light(sock->file, fput_needed); out: return err; out_release: @@ -1423,9 +1440,9 @@ asmlinkage long sys_connect(int fd, stru { struct socket *sock; char address[MAX_SOCK_ADDR]; - int err; + int err, fput_needed; - sock = sockfd_lookup(fd, &err); + sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; err = move_addr_to_kernel(uservaddr, addrlen, address); @@ -1439,7 +1456,7 @@ asmlinkage long sys_connect(int fd, stru err = sock->ops->connect(sock, (struct sockaddr *) address, addrlen, sock->file->f_flags); out_put: - sockfd_put(sock); + fput_light(sock->file, fput_needed); out: return err; } @@ -1453,9 +1470,9 @@ asmlinkage long sys_getsockname(int fd, { struct socket *sock; char address[MAX_SOCK_ADDR]; - int len, err; + int len, err, fput_needed; - sock = sockfd_lookup(fd, &err); + sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; @@ -1469,7 +1486,7 @@ asmlinkage long sys_getsockname(int fd, err = move_addr_to_user(address, len, usockaddr, usockaddr_len); out_put: - sockfd_put(sock); + fput_light(sock->file, fput_needed); out: return err; } @@ -1483,20 +1500,19 @@ asmlinkage long sys_getpeername(int fd, { struct socket *sock; char address[MAX_SOCK_ADDR]; - int len, err; + int len, err, fput_needed; - if ((sock = sockfd_lookup(fd, &err))!=NULL) - { + if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) { err = security_socket_getpeername(sock); if (err) { - sockfd_put(sock); + fput_light(sock->file, fput_needed); return err; } err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 1); if (!err) err=move_addr_to_user(address,len, usockaddr, usockaddr_len); - sockfd_put(sock); + fput_light(sock->file, fput_needed); } return err; } @@ -1515,10 +1531,16 @@ asmlinkage long sys_sendto(int fd, void int err; struct msghdr msg; struct iovec iov; - - sock = sockfd_lookup(fd, &err); + int fput_needed; + struct file *sock_file; + + sock_file = fget_light(fd, &fput_needed); + if (!sock_file) + return -EBADF; + + sock = sock_from_file(sock_file, &err); if (!sock) - goto out; + goto out_put; iov.iov_base=buff; iov.iov_len=len; msg.msg_name=NULL; @@ -1527,8 +1549,7 @@ asmlinkage long sys_sendto(int fd, void msg.msg_control=NULL; msg.msg_controllen=0; msg.msg_namelen=0; - if(addr) - { + if (addr) { err = move_addr_to_kernel(addr, addr_len, address); if (err < 0) goto out_put; @@ -1541,8 +1562,7 @@ asmlinkage long sys_sendto(int fd, void err = sock_sendmsg(sock, &msg, len); out_put: - sockfd_put(sock); -out: + fput_light(sock_file, fput_needed); return err; } @@ -1569,8 +1589,14 @@ asmlinkage long sys_recvfrom(int fd, voi struct msghdr msg; char address[MAX_SOCK_ADDR]; int err,err2; + struct file *sock_file; + int fput_needed; + + sock_file = fget_light(fd, &fput_needed); + if (!sock_file) + return -EBADF; - sock = sockfd_lookup(fd, &err); + sock = sock_from_file(sock_file, &err); if (!sock) goto out; @@ -1592,8 +1618,8 @@ asmlinkage long sys_recvfrom(int fd, voi if(err2<0) err=err2; } - sockfd_put(sock); out: + fput_light(sock_file, fput_needed); return err; } @@ -1613,25 +1639,24 @@ asmlinkage long sys_recv(int fd, void __ asmlinkage long sys_setsockopt(int fd, int level, int optname, char __user *optval, int optlen) { - int err; + int err, fput_needed; struct socket *sock; if (optlen < 0) return -EINVAL; - if ((sock = sockfd_lookup(fd, &err))!=NULL) + if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) { err = security_socket_setsockopt(sock,level,optname); - if (err) { - sockfd_put(sock); - return err; - } + if (err) + goto out_put; if (level == SOL_SOCKET) err=sock_setsockopt(sock,level,optname,optval,optlen); else err=sock->ops->setsockopt(sock, level, optname, optval, optlen); - sockfd_put(sock); +out_put: + fput_light(sock->file, fput_needed); } return err; } @@ -1643,23 +1668,20 @@ asmlinkage long sys_setsockopt(int fd, i asmlinkage long sys_getsockopt(int fd, int level, int optname, char __user *optval, int __user *optlen) { - int err; + int err, fput_needed; struct socket *sock; - if ((sock = sockfd_lookup(fd, &err))!=NULL) - { - err = security_socket_getsockopt(sock, level, - optname); - if (err) { - sockfd_put(sock); - return err; - } + if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) { + err = security_socket_getsockopt(sock, level, optname); + if (err) + goto out_put; if (level == SOL_SOCKET) err=sock_getsockopt(sock,level,optname,optval,optlen); else err=sock->ops->getsockopt(sock, level, optname, optval, optlen); - sockfd_put(sock); +out_put: + fput_light(sock->file, fput_needed); } return err; } @@ -1671,19 +1693,15 @@ asmlinkage long sys_getsockopt(int fd, i asmlinkage long sys_shutdown(int fd, int how) { - int err; + int err, fput_needed; struct socket *sock; - if ((sock = sockfd_lookup(fd, &err))!=NULL) + if ((sock = sockfd_lookup_light(fd, &err, &fput_needed))!=NULL) { err = security_socket_shutdown(sock, how); - if (err) { - sockfd_put(sock); - return err; - } - - err=sock->ops->shutdown(sock, how); - sockfd_put(sock); + if (!err) + err = sock->ops->shutdown(sock, how); + fput_light(sock->file, fput_needed); } return err; } @@ -1712,6 +1730,7 @@ asmlinkage long sys_sendmsg(int fd, stru unsigned char *ctl_buf = ctl; struct msghdr msg_sys; int err, ctl_len, iov_size, total_len; + int fput_needed; err = -EFAULT; if (MSG_CMSG_COMPAT & flags) { @@ -1720,7 +1739,7 @@ asmlinkage long sys_sendmsg(int fd, stru } else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) return -EFAULT; - sock = sockfd_lookup(fd, &err); + sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; @@ -1788,7 +1807,7 @@ out_freeiov: if (iov != iovstack) sock_kfree_s(sock->sk, iov, iov_size); out_put: - sockfd_put(sock); + fput_light(sock->file, fput_needed); out: return err; } @@ -1806,6 +1825,7 @@ asmlinkage long sys_recvmsg(int fd, stru struct msghdr msg_sys; unsigned long cmsg_ptr; int err, iov_size, total_len, len; + int fput_needed; /* kernel mode address */ char addr[MAX_SOCK_ADDR]; @@ -1821,7 +1841,7 @@ asmlinkage long sys_recvmsg(int fd, stru if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr))) return -EFAULT; - sock = sockfd_lookup(fd, &err); + sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; @@ -1888,7 +1908,7 @@ out_freeiov: if (iov != iovstack) sock_kfree_s(sock->sk, iov, iov_size); out_put: - sockfd_put(sock); + fput_light(sock->file, fput_needed); out: return err; } - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html