On Tue, Oct 3, 2017 at 9:45 PM, Oleg Nesterov <o...@redhat.com> wrote: > On 10/02, Andrew Morton wrote: >> >> From: Alexey Dobriyan <adobri...@gmail.com> >> Subject: pid: delete struct pidmap::nr_free >> >> There is a check in pid allocation code to skip a full page: >> >> if (likely(atomic_read(&map->nr_free))) { >> ... >> >> In practice it doesn't do anything. To skip a pidmap page one has to have >> 32K consecutive pids allocated at the same time which doesn't happen. >> >> Currently the price is _every_ fork/exit on every system being slower than >> necessary. > > Agreed, I too never understood how can this counter help. > > Add Gargi and Rik, the next version of "Replace PID bitmap allocation with IDR > API" can conflict with this and the previous pid-delete-reserved_pids.patch.
I think this patch will become obsolete as pidmap will be removed. As for the 1/2 patch of Alexey's series, I'll incorporate it so that rolled over PIDs start from 1 instead of RESERVED_PIDS. Thanks! Gargi > > Oleg. > >> >> Link: http://lkml.kernel.org/r/20170909203649.GB4791@avx2 >> Signed-off-by: Alexey Dobriyan <adobri...@gmail.com> >> Cc: "Eric W. Biederman" <ebied...@xmission.com> >> Cc: Kees Cook <keesc...@chromium.org> >> Cc: Oleg Nesterov <o...@redhat.com> >> Signed-off-by: Andrew Morton <a...@linux-foundation.org> >> --- >> >> include/linux/pid_namespace.h | 1 - >> kernel/pid.c | 28 ++++++++++------------------ >> kernel/pid_namespace.c | 6 ------ >> 3 files changed, 10 insertions(+), 25 deletions(-) >> >> diff -puN include/linux/pid_namespace.h~pid-delete-struct-pidmap-nr_free >> include/linux/pid_namespace.h >> --- a/include/linux/pid_namespace.h~pid-delete-struct-pidmap-nr_free >> +++ a/include/linux/pid_namespace.h >> @@ -11,7 +11,6 @@ >> #include <linux/ns_common.h> >> >> struct pidmap { >> - atomic_t nr_free; >> void *page; >> }; >> >> diff -puN kernel/pid.c~pid-delete-struct-pidmap-nr_free kernel/pid.c >> --- a/kernel/pid.c~pid-delete-struct-pidmap-nr_free >> +++ a/kernel/pid.c >> @@ -68,9 +68,6 @@ static inline int mk_pid(struct pid_name >> */ >> struct pid_namespace init_pid_ns = { >> .kref = KREF_INIT(2), >> - .pidmap = { >> - [ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL >> } >> - }, >> .last_pid = 0, >> .nr_hashed = PIDNS_HASH_ADDING, >> .level = 0, >> @@ -106,7 +103,6 @@ static void free_pidmap(struct upid *upi >> int offset = nr & BITS_PER_PAGE_MASK; >> >> clear_bit(offset, map->page); >> - atomic_inc(&map->nr_free); >> } >> >> /* >> @@ -181,20 +177,17 @@ static int alloc_pidmap(struct pid_names >> if (unlikely(!map->page)) >> return -ENOMEM; >> } >> - if (likely(atomic_read(&map->nr_free))) { >> - for ( ; ; ) { >> - if (!test_and_set_bit(offset, map->page)) { >> - atomic_dec(&map->nr_free); >> - set_last_pid(pid_ns, last, pid); >> - return pid; >> - } >> - offset = find_next_offset(map, offset); >> - if (offset >= BITS_PER_PAGE) >> - break; >> - pid = mk_pid(pid_ns, map, offset); >> - if (pid >= pid_max) >> - break; >> + for (;;) { >> + if (!test_and_set_bit(offset, map->page)) { >> + set_last_pid(pid_ns, last, pid); >> + return pid; >> } >> + offset = find_next_offset(map, offset); >> + if (offset >= BITS_PER_PAGE) >> + break; >> + pid = mk_pid(pid_ns, map, offset); >> + if (pid >= pid_max) >> + break; >> } >> if (map < &pid_ns->pidmap[(pid_max-1)/BITS_PER_PAGE]) { >> ++map; >> @@ -591,7 +584,6 @@ void __init pidmap_init(void) >> init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL); >> /* Reserve PID 0. We never call free_pidmap(0) */ >> set_bit(0, init_pid_ns.pidmap[0].page); >> - atomic_dec(&init_pid_ns.pidmap[0].nr_free); >> >> init_pid_ns.pid_cachep = KMEM_CACHE(pid, >> SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT); >> diff -puN kernel/pid_namespace.c~pid-delete-struct-pidmap-nr_free >> kernel/pid_namespace.c >> --- a/kernel/pid_namespace.c~pid-delete-struct-pidmap-nr_free >> +++ a/kernel/pid_namespace.c >> @@ -98,7 +98,6 @@ static struct pid_namespace *create_pid_ >> struct pid_namespace *ns; >> unsigned int level = parent_pid_ns->level + 1; >> struct ucounts *ucounts; >> - int i; >> int err; >> >> err = -EINVAL; >> @@ -139,11 +138,6 @@ static struct pid_namespace *create_pid_ >> INIT_WORK(&ns->proc_work, proc_cleanup_work); >> >> set_bit(0, ns->pidmap[0].page); >> - atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); >> - >> - for (i = 1; i < PIDMAP_ENTRIES; i++) >> - atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); >> - >> return ns; >> >> out_free_map: >> _ >> >> Patches currently in -mm which might be from adobri...@gmail.com are >> >> proc-uninline-name_to_int.patch >> proc-use-do-while-in-name_to_int.patch >> seq_file-delete-small-value-optimization.patch >> pid-delete-reserved_pids.patch >> pid-delete-struct-pidmap-nr_free.patch >> >