This makes it safe to deference task->files with just rcu_read_lock
held, removing the need to take task_lock.

Signed-off-by: Eric W. Biederman <ebied...@xmission.com>
---

I have cleaned up my patch a little more and made this a little
more explicitly rcu.  I took a completley different approach to
documenting the use of rcu_head than we described that seems a little
more robust.

 fs/file.c               | 53 ++++++++++++++++++++++++++---------------
 include/linux/fdtable.h |  1 +
 kernel/fork.c           |  4 ++--
 3 files changed, 37 insertions(+), 21 deletions(-)

diff --git a/fs/file.c b/fs/file.c
index 412033d8cfdf..88064f185560 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -379,7 +379,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, 
unsigned int max_fds, int
        return NULL;
 }
 
-static struct fdtable *close_files(struct files_struct * files)
+static void close_files(struct files_struct * files)
 {
        /*
         * It is safe to dereference the fd table without RCU or
@@ -397,8 +397,9 @@ static struct fdtable *close_files(struct files_struct * 
files)
                set = fdt->open_fds[j++];
                while (set) {
                        if (set & 1) {
-                               struct file * file = xchg(&fdt->fd[i], NULL);
+                               struct file * file = fdt->fd[i];
                                if (file) {
+                                       rcu_assign_pointer(fdt->fd[i], NULL);
                                        filp_close(file, files);
                                        cond_resched();
                                }
@@ -407,19 +408,35 @@ static struct fdtable *close_files(struct files_struct * 
files)
                        set >>= 1;
                }
        }
+}
 
-       return fdt;
+static void free_files_struct_rcu(struct rcu_head *rcu)
+{
+       struct files_struct *files =
+               container_of(rcu, struct files_struct, fdtab.rcu);
+       struct fdtable *fdt = rcu_dereference_raw(files->fdt);
+
+       /* free the arrays if they are not embedded */
+       if (fdt != &files->fdtab)
+               __free_fdtable(fdt);
+       kmem_cache_free(files_cachep, files);
 }
 
 void put_files_struct(struct files_struct *files)
 {
        if (atomic_dec_and_test(&files->count)) {
-               struct fdtable *fdt = close_files(files);
-
-               /* free the arrays if they are not embedded */
-               if (fdt != &files->fdtab)
-                       __free_fdtable(fdt);
-               kmem_cache_free(files_cachep, files);
+               close_files(files);
+               /*
+                * The rules for using the rcu_head in fdtable:
+                *
+                * If the fdtable is being freed independently
+                * of the files_struct fdtable->rcu is used.
+                *
+                * When the fdtable and files_struct are freed
+                * together the rcu_head from the fdtable embedded in
+                * files_struct is used.
+                */
+               call_rcu(&files->fdtab.rcu, free_files_struct_rcu);
        }
 }
 
@@ -822,12 +839,14 @@ EXPORT_SYMBOL(fget_raw);
 
 struct file *fget_task(struct task_struct *task, unsigned int fd)
 {
+       struct files_struct *files;
        struct file *file = NULL;
 
-       task_lock(task);
-       if (task->files)
-               file = __fget_files(task->files, fd, 0, 1);
-       task_unlock(task);
+       rcu_read_lock();
+       files = rcu_dereference(task->files);
+       if (files)
+               file = __fget_files(files, fd, 0, 1);
+       rcu_read_unlock();
 
        return file;
 }
@@ -838,11 +857,9 @@ struct file *task_lookup_fd_rcu(struct task_struct *task, 
unsigned int fd)
        struct files_struct *files;
        struct file *file = NULL;
 
-       task_lock(task);
-       files = task->files;
+       files = rcu_dereference(task->files);
        if (files)
                file = files_lookup_fd_rcu(files, fd);
-       task_unlock(task);
 
        return file;
 }
@@ -854,8 +871,7 @@ struct file *task_lookup_next_fd_rcu(struct task_struct 
*task, unsigned int *ret
        unsigned int fd = *ret_fd;
        struct file *file = NULL;
 
-       task_lock(task);
-       files = task->files;
+       files = rcu_dereference(task->files);
        if (files) {
                for (; fd < files_fdtable(files)->max_fds; fd++) {
                        file = files_lookup_fd_rcu(files, fd);
@@ -863,7 +879,6 @@ struct file *task_lookup_next_fd_rcu(struct task_struct 
*task, unsigned int *ret
                                break;
                }
        }
-       task_unlock(task);
        *ret_fd = fd;
        return file;
 }
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index d0e78174874a..37dcface020f 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -30,6 +30,7 @@ struct fdtable {
        unsigned long *close_on_exec;
        unsigned long *open_fds;
        unsigned long *full_fds_bits;
+       /* Used for freeing fdtable and any containing files_struct */
        struct rcu_head rcu;
 };
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 4d0ae6f827df..eca474a1586a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2982,7 +2982,7 @@ int ksys_unshare(unsigned long unshare_flags)
 
                if (new_fd) {
                        fd = current->files;
-                       current->files = new_fd;
+                       rcu_assign_pointer(current->files, new_fd);
                        new_fd = fd;
                }
 
@@ -3035,7 +3035,7 @@ int unshare_files(void)
 
        old = task->files;
        task_lock(task);
-       task->files = copy;
+       rcu_assign_pointer(task->files, copy);
        task_unlock(task);
        put_files_struct(old);
        return 0;
-- 
2.20.1

Reply via email to