From: "David S. Miller" <[EMAIL PROTECTED]> Date: Thu, 01 Dec 2005 18:51:19 -0800 (PST)
> From: Stephen Hemminger <[EMAIL PROTECTED]> > Date: Thu, 1 Dec 2005 10:51:55 -0800 > > > Detail of issue is old: > > > > http://oss.sgi.com/archives/netdev/2004-11/msg00573.html > > > > Once accept fails on EMFILE/ENFILE you cannot get the socket even if you > > free up > > fd's and retry... > > I want to apply something like what is in that netdev posting. It > does make accept() more robust, but there is still a hole. ... > If we take an -EFAULT at the end of sys_accept(), we'll lose the > socket in that case as well, even with the above mentioned fix. > But we have to fully complete the ->accept() call before doing > the write back to userspace. I haven't found a way to solve the -EFAULT problem, but we should fix the ENFILE/EMFILE issue since we can, this has sat on the backburner long enough :-) So I just the following into the net-2.6.17 tree. diff-tree dc326c4936f41911046b2dc72cbe04053e9680d6 (from d52610bd92bb915a741098cc9b7fa23a5629fc10) Author: David S. Miller <[EMAIL PROTECTED]> Date: Thu Jan 26 20:45:15 2006 -0800 [NET]: Do not lose accepted socket when -ENFILE/-EMFILE. Try to allocate the struct file and an unused file descriptor before we try to pull a newly accepted socket out of the protocol layer. Based upon a patch by Prassana Meda. Signed-off-by: David S. Miller <[EMAIL PROTECTED]> diff --git a/net/socket.c b/net/socket.c index b38a263..8d4032a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -351,8 +351,8 @@ static struct dentry_operations sockfs_d /* * Obtains the first available file descriptor and sets it up for use. * - * This function creates file structure and maps it to fd space - * of current process. On success it returns file descriptor + * These functions create file structures and maps them to fd space + * of the current process. On success it returns file descriptor * and file struct implicitly stored in sock->file. * Note that another thread may close file descriptor before we return * from this function. We use the fact that now we do not refer @@ -365,52 +365,67 @@ static struct dentry_operations sockfs_d * but we take care of internal coherence yet. */ -int sock_map_fd(struct socket *sock) +static int sock_alloc_fd(struct file **filep) { int fd; - struct qstr this; - char name[32]; - - /* - * Find a file descriptor suitable for return to the user. - */ fd = get_unused_fd(); - if (fd >= 0) { + if (likely(fd >= 0)) { struct file *file = get_empty_filp(); - if (!file) { + if (unlikely(!file)) { put_unused_fd(fd); - fd = -ENFILE; - goto out; + return -ENFILE; } + *filep = file; + } else + *filep = NULL; + return fd; +} - this.len = sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino); - this.name = name; - this.hash = SOCK_INODE(sock)->i_ino; - - file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this); - if (!file->f_dentry) { - put_filp(file); +static int sock_attach_fd(struct socket *sock, struct file *file) +{ + struct qstr this; + char name[32]; + + this.len = sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino); + this.name = name; + this.hash = SOCK_INODE(sock)->i_ino; + + file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this); + if (unlikely(!file->f_dentry)) + return -ENOMEM; + + file->f_dentry->d_op = &sockfs_dentry_operations; + d_add(file->f_dentry, SOCK_INODE(sock)); + file->f_vfsmnt = mntget(sock_mnt); + file->f_mapping = file->f_dentry->d_inode->i_mapping; + + sock->file = file; + file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops; + file->f_mode = FMODE_READ | FMODE_WRITE; + file->f_flags = O_RDWR; + file->f_pos = 0; + file->private_data = sock; + + return 0; +} + +int sock_map_fd(struct socket *sock) +{ + struct file *newfile; + int fd = sock_alloc_fd(&newfile); + + if (likely(fd >= 0)) { + int err = sock_attach_fd(sock, newfile); + + if (unlikely(err < 0)) { + fput(newfile); put_unused_fd(fd); - fd = -ENOMEM; - goto out; + return err; } - file->f_dentry->d_op = &sockfs_dentry_operations; - d_add(file->f_dentry, SOCK_INODE(sock)); - file->f_vfsmnt = mntget(sock_mnt); - file->f_mapping = file->f_dentry->d_inode->i_mapping; - - sock->file = file; - file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops; - file->f_mode = FMODE_READ | FMODE_WRITE; - file->f_flags = O_RDWR; - file->f_pos = 0; - file->private_data = sock; - fd_install(fd, file); + fd_install(fd, newfile); } - -out: return fd; } @@ -1352,7 +1367,8 @@ asmlinkage long sys_listen(int fd, int b asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen) { struct socket *sock, *newsock; - int err, len; + struct file *newfile; + int err, len, newfd; char address[MAX_SOCK_ADDR]; sock = sockfd_lookup(fd, &err); @@ -1372,28 +1388,38 @@ asmlinkage long sys_accept(int fd, struc */ __module_get(newsock->ops->owner); + newfd = sock_alloc_fd(&newfile); + if (newfd < 0) { + err = newfd; + goto out_release; + } + + err = sock_attach_fd(sock, newfile); + if (err < 0) + goto out_fd; + err = security_socket_accept(sock, newsock); if (err) - goto out_release; + goto out_fd; err = sock->ops->accept(sock, newsock, sock->file->f_flags); if (err < 0) - goto out_release; + goto out_fd; if (upeer_sockaddr) { if(newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 2)<0) { err = -ECONNABORTED; - goto out_release; + goto out_fd; } err = move_addr_to_user(address, len, upeer_sockaddr, upeer_addrlen); if (err < 0) - goto out_release; + goto out_fd; } /* File flags are not inherited via accept() unlike another OSes. */ - if ((err = sock_map_fd(newsock)) < 0) - goto out_release; + fd_install(newfd, newfile); + err = newfd; security_socket_post_accept(sock, newsock); @@ -1401,6 +1427,9 @@ out_put: sockfd_put(sock); out: return err; +out_fd: + fput(newfile); + put_unused_fd(newfd); out_release: sock_release(newsock); goto out_put; - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html