From: "David S. Miller" <[EMAIL PROTECTED]>
Date: Thu, 01 Dec 2005 18:51:19 -0800 (PST)

> From: Stephen Hemminger <[EMAIL PROTECTED]>
> Date: Thu, 1 Dec 2005 10:51:55 -0800
> 
> > Detail of issue is old:
> > 
> > http://oss.sgi.com/archives/netdev/2004-11/msg00573.html
> > 
> > Once accept fails on EMFILE/ENFILE you cannot get the socket even if you 
> > free up
> > fd's and retry...
> 
> I want to apply something like what is in that netdev posting.  It
> does make accept() more robust, but there is still a hole.
 ...
> If we take an -EFAULT at the end of sys_accept(), we'll lose the
> socket in that case as well, even with the above mentioned fix.
> But we have to fully complete the ->accept() call before doing
> the write back to userspace.

I haven't found a way to solve the -EFAULT problem, but we should
fix the ENFILE/EMFILE issue since we can, this has sat on the
backburner long enough :-)

So I just the following into the net-2.6.17 tree.

diff-tree dc326c4936f41911046b2dc72cbe04053e9680d6 (from 
d52610bd92bb915a741098cc9b7fa23a5629fc10)
Author: David S. Miller <[EMAIL PROTECTED]>
Date:   Thu Jan 26 20:45:15 2006 -0800

    [NET]: Do not lose accepted socket when -ENFILE/-EMFILE.
    
    Try to allocate the struct file and an unused file
    descriptor before we try to pull a newly accepted
    socket out of the protocol layer.
    
    Based upon a patch by Prassana Meda.
    
    Signed-off-by: David S. Miller <[EMAIL PROTECTED]>

diff --git a/net/socket.c b/net/socket.c
index b38a263..8d4032a 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -351,8 +351,8 @@ static struct dentry_operations sockfs_d
 /*
  *     Obtains the first available file descriptor and sets it up for use.
  *
- *     This function creates file structure and maps it to fd space
- *     of current process. On success it returns file descriptor
+ *     These functions create file structures and maps them to fd space
+ *     of the current process. On success it returns file descriptor
  *     and file struct implicitly stored in sock->file.
  *     Note that another thread may close file descriptor before we return
  *     from this function. We use the fact that now we do not refer
@@ -365,52 +365,67 @@ static struct dentry_operations sockfs_d
  *     but we take care of internal coherence yet.
  */
 
-int sock_map_fd(struct socket *sock)
+static int sock_alloc_fd(struct file **filep)
 {
        int fd;
-       struct qstr this;
-       char name[32];
-
-       /*
-        *      Find a file descriptor suitable for return to the user. 
-        */
 
        fd = get_unused_fd();
-       if (fd >= 0) {
+       if (likely(fd >= 0)) {
                struct file *file = get_empty_filp();
 
-               if (!file) {
+               if (unlikely(!file)) {
                        put_unused_fd(fd);
-                       fd = -ENFILE;
-                       goto out;
+                       return -ENFILE;
                }
+               *filep = file;
+       } else
+               *filep = NULL;
+       return fd;
+}
 
-               this.len = sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino);
-               this.name = name;
-               this.hash = SOCK_INODE(sock)->i_ino;
-
-               file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);
-               if (!file->f_dentry) {
-                       put_filp(file);
+static int sock_attach_fd(struct socket *sock, struct file *file)
+{
+       struct qstr this;
+       char name[32];
+
+       this.len = sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino);
+       this.name = name;
+       this.hash = SOCK_INODE(sock)->i_ino;
+
+       file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);
+       if (unlikely(!file->f_dentry))
+               return -ENOMEM;
+
+       file->f_dentry->d_op = &sockfs_dentry_operations;
+       d_add(file->f_dentry, SOCK_INODE(sock));
+       file->f_vfsmnt = mntget(sock_mnt);
+       file->f_mapping = file->f_dentry->d_inode->i_mapping;
+
+       sock->file = file;
+       file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
+       file->f_mode = FMODE_READ | FMODE_WRITE;
+       file->f_flags = O_RDWR;
+       file->f_pos = 0;
+       file->private_data = sock;
+
+       return 0;
+}
+
+int sock_map_fd(struct socket *sock)
+{
+       struct file *newfile;
+       int fd = sock_alloc_fd(&newfile);
+
+       if (likely(fd >= 0)) {
+               int err = sock_attach_fd(sock, newfile);
+
+               if (unlikely(err < 0)) {
+                       fput(newfile);
                        put_unused_fd(fd);
-                       fd = -ENOMEM;
-                       goto out;
+                       return err;
                }
-               file->f_dentry->d_op = &sockfs_dentry_operations;
-               d_add(file->f_dentry, SOCK_INODE(sock));
-               file->f_vfsmnt = mntget(sock_mnt);
-               file->f_mapping = file->f_dentry->d_inode->i_mapping;
-
-               sock->file = file;
-               file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
-               file->f_mode = FMODE_READ | FMODE_WRITE;
-               file->f_flags = O_RDWR;
-               file->f_pos = 0;
-               file->private_data = sock;
-               fd_install(fd, file);
+               fd_install(fd, newfile);
        }
-
-out:
        return fd;
 }
 
@@ -1352,7 +1367,8 @@ asmlinkage long sys_listen(int fd, int b
 asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int 
__user *upeer_addrlen)
 {
        struct socket *sock, *newsock;
-       int err, len;
+       struct file *newfile;
+       int err, len, newfd;
        char address[MAX_SOCK_ADDR];
 
        sock = sockfd_lookup(fd, &err);
@@ -1372,28 +1388,38 @@ asmlinkage long sys_accept(int fd, struc
         */
        __module_get(newsock->ops->owner);
 
+       newfd = sock_alloc_fd(&newfile);
+       if (newfd < 0) {
+               err = newfd;
+               goto out_release;
+       }
+
+       err = sock_attach_fd(sock, newfile);
+       if (err < 0)
+               goto out_fd;
+
        err = security_socket_accept(sock, newsock);
        if (err)
-               goto out_release;
+               goto out_fd;
 
        err = sock->ops->accept(sock, newsock, sock->file->f_flags);
        if (err < 0)
-               goto out_release;
+               goto out_fd;
 
        if (upeer_sockaddr) {
                if(newsock->ops->getname(newsock, (struct sockaddr *)address, 
&len, 2)<0) {
                        err = -ECONNABORTED;
-                       goto out_release;
+                       goto out_fd;
                }
                err = move_addr_to_user(address, len, upeer_sockaddr, 
upeer_addrlen);
                if (err < 0)
-                       goto out_release;
+                       goto out_fd;
        }
 
        /* File flags are not inherited via accept() unlike another OSes. */
 
-       if ((err = sock_map_fd(newsock)) < 0)
-               goto out_release;
+       fd_install(newfd, newfile);
+       err = newfd;
 
        security_socket_post_accept(sock, newsock);
 
@@ -1401,6 +1427,9 @@ out_put:
        sockfd_put(sock);
 out:
        return err;
+out_fd:
+       fput(newfile);
+       put_unused_fd(newfd);
 out_release:
        sock_release(newsock);
        goto out_put;
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to