From: Kirill Tkhai <ktk...@virtuozzo.com> Patchset description: Shrink big fdtable on criu restore
This patchset allows to avoid memory overuse introduced by service fds on criu restore. The solution is simple: smartly check for closed fd number, and shrink fdtable if this could be made. The checks are happen in is_pseudosuper mode, so we do not affect performance on normal work mode. The problem is we can't solve this for 100% case in userspace. Kernel allows to fix that completely. https://jira.sw.ru/browse/PSBM-78827 Eric Dumazet (1): ms/fs/file.c: don't acquire files->file_lock in fd_install() Kirill Tkhai (3): files: Add new argument to expand_files() files: Add fdtable_align() helper files: Shrink big fdtable on close in is_pseudosuper mode Mateusz Guzik (1): ms/vfs: grab the lock instead of blocking in __fd_install during resizing ============================================================ This patch description: This trick is going to be used for criu restore, to release excess memory occupied by service files: we check a closing fd, and if it's a half of max available fdtable number, we try to shrink the fdstable and decrease amoung of memory needed to store task's fds. Currently is_pseudosuper state is used to detect restore, but it can be changed later if needed. Signed-off-by: Kirill Tkhai <ktk...@virtuozzo.com> Reviewed-by: Cyrill Gorcunov <gorcu...@openvz.org> Rebase to vz8: - Used rebased to RH7.9 vz7 commit 4b024fd120c5 ("ve/fs/files: Shrink big fdtable on close in is_pseudosuper mode") which handles new copy_fd_bitmaps helper function. (cherry picked from vz7 commit 4b024fd120c5 ("ve/fs/files: Shrink big fdtable on close in is_pseudosuper mode")) Signed-off-by: Andrey Zhadchenko <andrey.zhadche...@virtuozzo.com> +++ fs: Fix race with old fdt parallel reuse We own files->resize_in_progress in this moment, but it's only seen by guys, who expands the fdt. If someone wants to use fd inside old fdt max number, he just doesn't look at this. So, let's check old maximum fd after lock is acquired to see such the parallel users. https://jira.sw.ru/browse/PSBM-82984 Signed-off-by: Kirill Tkhai <ktk...@virtuozzo.com> Acked-by: Cyrill Gorcunov <gorcu...@openvz.org> (cherry picked from vz7 commit 01eb18f336ef ("fs: Fix race with old fdt parallel reuse")) mFixes: c4a4fada18a "ve/fs/files: Shrink big fdtable on close in is_pseudosuper mode" Signed-off-by: Vasily Averin <v...@virtuozzo.com> (cherry picked from vz8 commit 50a72f1810df59c1aada1a5efba8fb052075693c) Signed-off-by: Andrey Zhadchenko <andrey.zhadche...@virtuozzo.com> --- fs/file.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 15 deletions(-) diff --git a/fs/file.c b/fs/file.c index 2163301..fabb57d 100644 --- a/fs/file.c +++ b/fs/file.c @@ -21,6 +21,7 @@ #include <linux/rcupdate.h> #include <linux/close_range.h> #include <net/sock.h> +#include <linux/ve.h> #include "internal.h" @@ -52,21 +53,25 @@ static void free_fdtable_rcu(struct rcu_head *rcu) * spinlock held for write. */ static void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt, - unsigned int count) + unsigned int count, bool shrink) { unsigned int cpy, set; - cpy = count / BITS_PER_BYTE; + cpy = min(count, nfdt->max_fds) / BITS_PER_BYTE; set = (nfdt->max_fds - count) / BITS_PER_BYTE; memcpy(nfdt->open_fds, ofdt->open_fds, cpy); - memset((char *)nfdt->open_fds + cpy, 0, set); + if (!shrink) + memset((char *)nfdt->open_fds + cpy, 0, set); + memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy); - memset((char *)nfdt->close_on_exec + cpy, 0, set); + if (!shrink) + memset((char *)nfdt->close_on_exec + cpy, 0, set); - cpy = BITBIT_SIZE(count); + cpy = BITBIT_SIZE(min(count, nfdt->max_fds)); set = BITBIT_SIZE(nfdt->max_fds) - cpy; memcpy(nfdt->full_fds_bits, ofdt->full_fds_bits, cpy); - memset((char *)nfdt->full_fds_bits + cpy, 0, set); + if (!shrink) + memset((char *)nfdt->full_fds_bits + cpy, 0, set); } /* @@ -77,14 +82,15 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt, bool shrink { size_t cpy, set; - BUG_ON(nfdt->max_fds < ofdt->max_fds); + BUG_ON((nfdt->max_fds < ofdt->max_fds) != shrink); - cpy = ofdt->max_fds * sizeof(struct file *); + cpy = min(ofdt->max_fds, nfdt->max_fds) * sizeof(struct file *); set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *); memcpy(nfdt->fd, ofdt->fd, cpy); - memset((char *)nfdt->fd + cpy, 0, set); + if (!shrink) + memset((char *)nfdt->fd + cpy, 0, set); - copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds); + copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds, shrink); } static unsigned int fdtable_align(unsigned int nr) @@ -175,16 +181,25 @@ static int expand_fdtable(struct files_struct *files, unsigned int nr, bool shri spin_lock(&files->file_lock); if (!new_fdt) return -ENOMEM; + cur_fdt = files_fdtable(files); /* * extremely unlikely race - sysctl_nr_open decreased between the check in * caller and alloc_fdtable(). Cheaper to catch it here... */ - if (unlikely(new_fdt->max_fds <= nr)) { + if (unlikely((new_fdt->max_fds <= nr && !shrink) || + (shrink && new_fdt->max_fds >= cur_fdt->max_fds))) { __free_fdtable(new_fdt); return -EMFILE; } - cur_fdt = files_fdtable(files); - BUG_ON(nr < cur_fdt->max_fds); + if (unlikely(shrink)) { + int i; + i = find_last_bit(cur_fdt->open_fds, cur_fdt->max_fds); + if (i >= new_fdt->max_fds) { + __free_fdtable(new_fdt); + return 1; + } + } + BUG_ON((nr < cur_fdt->max_fds) != shrink); copy_fdtable(new_fdt, cur_fdt, shrink); rcu_assign_pointer(files->fdt, new_fdt); if (cur_fdt != &files->fdtab) @@ -213,7 +228,7 @@ static int expand_files(struct files_struct *files, unsigned int nr, bool shrink fdt = files_fdtable(files); /* Do we need to expand? */ - if (nr < fdt->max_fds) + if (nr < fdt->max_fds && !shrink) return expanded; /* Can we expand? */ @@ -228,6 +243,15 @@ static int expand_files(struct files_struct *files, unsigned int nr, bool shrink goto repeat; } + if (unlikely(shrink)) { + unsigned int i; + i = find_last_bit(fdt->open_fds, fdt->max_fds); + nr = i; + i = fdtable_align(i); + if (i >= fdt->max_fds) + return expanded; + } + /* All good, so we try */ files->resize_in_progress = true; expanded = expand_fdtable(files, nr, shrink); @@ -352,7 +376,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int open_files = sane_fdtable_size(old_fdt, max_fds); } - copy_fd_bitmaps(new_fdt, old_fdt, open_files); + copy_fd_bitmaps(new_fdt, old_fdt, open_files, false); old_fds = old_fdt->fd; new_fds = new_fdt->fd; @@ -618,6 +642,13 @@ static struct file *pick_file(struct files_struct *files, unsigned fd) rcu_assign_pointer(fdt->fd[fd], NULL); __put_unused_fd(files, fd); + /* Try to shrink fdt and to free memory */ + if (unlikely(fd * 2 >= fdt->max_fds && + fd > (1024 / sizeof(struct file *))) && + get_exec_env() != get_ve0() && + get_exec_env()->is_pseudosuper) + expand_files(files, fd, true); + out_unlock: spin_unlock(&files->file_lock); return file; -- 1.8.3.1 _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel