From: Kirill Tkhai <ktk...@virtuozzo.com>

Patchset description:
Shrink big fdtable on criu restore

This patchset allows to avoid memory overuse introduced by service fds on criu
restore.
The solution is simple: smartly check for closed fd number, and shrink fdtable
if this could be made. The checks are happen in is_pseudosuper mode, so we do
not affect performance on normal work mode.

The problem is we can't solve this for 100% case in userspace.
Kernel allows to fix that completely.

https://jira.sw.ru/browse/PSBM-78827

Eric Dumazet (1):
      ms/fs/file.c: don't acquire files->file_lock in fd_install()

Kirill Tkhai (3):
      files: Add new argument to expand_files()
      files: Add fdtable_align() helper
      files: Shrink big fdtable on close in is_pseudosuper mode

Mateusz Guzik (1):
      ms/vfs: grab the lock instead of blocking in __fd_install during resizing

============================================================
This patch description:

This trick is going to be used for criu restore, to release excess memory
occupied by service files:
we check a closing fd, and if it's a half of max available fdtable number, we
try to shrink the fdstable and decrease amoung of memory needed to store task's
fds.

Currently is_pseudosuper state is used to detect restore, but it can be changed
later if needed.

Signed-off-by: Kirill Tkhai <ktk...@virtuozzo.com>
Reviewed-by: Cyrill Gorcunov <gorcu...@openvz.org>

Rebase to vz8:
 - Used rebased to RH7.9 commit 4b024fd120c5cfc3775387e2ed2e29d389a42849 which
handles new copy_fd_bitmaps helper function.

(cherry picked from e4a319f998910317ce1559acebecca365f85d8ba)
Signed-off-by: Andrey Zhadchenko <andrey.zhadche...@virtuozzo.com>
---
 fs/file.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 48 insertions(+), 15 deletions(-)

diff --git a/fs/file.c b/fs/file.c
index 4f68ef0f..0ed1c4c 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -18,6 +18,7 @@
 #include <linux/bitops.h>
 #include <linux/spinlock.h>
 #include <linux/rcupdate.h>
+#include <linux/ve.h>
 
 unsigned int sysctl_nr_open __read_mostly = 1024*1024;
 unsigned int sysctl_nr_open_min = BITS_PER_LONG;
@@ -47,21 +48,25 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
  * spinlock held for write.
  */
 static void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt,
-                           unsigned int count)
+                           unsigned int count, bool shrink)
 {
        unsigned int cpy, set;
 
-       cpy = count / BITS_PER_BYTE;
+       cpy = min(count, nfdt->max_fds) / BITS_PER_BYTE;
        set = (nfdt->max_fds - count) / BITS_PER_BYTE;
        memcpy(nfdt->open_fds, ofdt->open_fds, cpy);
-       memset((char *)nfdt->open_fds + cpy, 0, set);
+       if (!shrink)
+               memset((char *)nfdt->open_fds + cpy, 0, set);
+
        memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy);
-       memset((char *)nfdt->close_on_exec + cpy, 0, set);
+       if (!shrink)
+               memset((char *)nfdt->close_on_exec + cpy, 0, set);
 
-       cpy = BITBIT_SIZE(count);
+       cpy = BITBIT_SIZE(min(count, nfdt->max_fds));
        set = BITBIT_SIZE(nfdt->max_fds) - cpy;
        memcpy(nfdt->full_fds_bits, ofdt->full_fds_bits, cpy);
-       memset((char *)nfdt->full_fds_bits + cpy, 0, set);
+       if (!shrink)
+               memset((char *)nfdt->full_fds_bits + cpy, 0, set);
 }
 
 /*
@@ -72,14 +77,15 @@ static void copy_fdtable(struct fdtable *nfdt, struct 
fdtable *ofdt, bool shrink
 {
        unsigned int cpy, set;
 
-       BUG_ON(nfdt->max_fds < ofdt->max_fds);
+       BUG_ON((nfdt->max_fds < ofdt->max_fds) != shrink);
 
-       cpy = ofdt->max_fds * sizeof(struct file *);
+       cpy = min(ofdt->max_fds, nfdt->max_fds) * sizeof(struct file *);
        set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *);
        memcpy(nfdt->fd, ofdt->fd, cpy);
-       memset((char *)nfdt->fd + cpy, 0, set);
+       if (!shrink)
+               memset((char *)nfdt->fd + cpy, 0, set);
 
-       copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds);
+       copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds, shrink);
 }
 
 static unsigned int fdtable_align(unsigned int nr)
@@ -170,16 +176,26 @@ static int expand_fdtable(struct files_struct *files, 
unsigned int nr, bool shri
        spin_lock(&files->file_lock);
        if (!new_fdt)
                return -ENOMEM;
+       cur_fdt = files_fdtable(files);
        /*
         * extremely unlikely race - sysctl_nr_open decreased between the check 
in
         * caller and alloc_fdtable().  Cheaper to catch it here...
         */
-       if (unlikely(new_fdt->max_fds <= nr)) {
+       if (unlikely((new_fdt->max_fds <= nr && !shrink) ||
+                    (shrink && new_fdt->max_fds >= cur_fdt->max_fds))) {
                __free_fdtable(new_fdt);
                return -EMFILE;
        }
-       cur_fdt = files_fdtable(files);
-       BUG_ON(nr < cur_fdt->max_fds);
+       if (unlikely(shrink)) {
+               int i;
+               i = find_last_bit(cur_fdt->open_fds, cur_fdt->max_fds);
+               i = fdtable_align(i);
+               if (i == cur_fdt->max_fds) {
+                       __free_fdtable(new_fdt);
+                       return 1;
+               }
+       }
+       BUG_ON((nr < cur_fdt->max_fds) != shrink);
        copy_fdtable(new_fdt, cur_fdt, shrink);
        rcu_assign_pointer(files->fdt, new_fdt);
        if (cur_fdt != &files->fdtab)
@@ -208,7 +224,7 @@ static int expand_files(struct files_struct *files, 
unsigned int nr, bool shrink
        fdt = files_fdtable(files);
 
        /* Do we need to expand? */
-       if (nr < fdt->max_fds)
+       if (nr < fdt->max_fds && !shrink)
                return expanded;
 
        /* Can we expand? */
@@ -223,6 +239,15 @@ static int expand_files(struct files_struct *files, 
unsigned int nr, bool shrink
                goto repeat;
        }
 
+       if (unlikely(shrink)) {
+               unsigned int i;
+               i = find_last_bit(fdt->open_fds, fdt->max_fds);
+               nr = i;
+               i = fdtable_align(i);
+               if (i >= fdt->max_fds)
+                       return expanded;
+       }
+
        /* All good, so we try */
        files->resize_in_progress = true;
        expanded = expand_fdtable(files, nr, shrink);
@@ -337,7 +362,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int 
*errorp)
                open_files = count_open_files(old_fdt);
        }
 
-       copy_fd_bitmaps(new_fdt, old_fdt, open_files);
+       copy_fd_bitmaps(new_fdt, old_fdt, open_files, false);
 
        old_fds = old_fdt->fd;
        new_fds = new_fdt->fd;
@@ -643,6 +668,14 @@ int __close_fd(struct files_struct *files, unsigned fd)
                goto out_unlock;
        rcu_assign_pointer(fdt->fd[fd], NULL);
        __put_unused_fd(files, fd);
+
+       /* Try to shrink fdt and to free memory */
+       if (unlikely(fd * 2 >= fdt->max_fds &&
+                    fd > (1024 / sizeof(struct file *))) &&
+                    get_exec_env() != get_ve0() &&
+                    get_exec_env()->is_pseudosuper)
+               expand_files(files, fd, true);
+
        spin_unlock(&files->file_lock);
        return filp_close(file, files);
 
-- 
1.8.3.1

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to